Kernel  |  4.1

下载     查看原文件
C++程序  |  1357行  |  34.09 KB
/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception
 *
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include <linux/module.h>
#include <linux/circ_buf.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/udp.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"

/*
 * How long to wait before scheduling ACK generation after seeing a
 * packet with RXRPC_REQUEST_ACK set (in jiffies).
 */
unsigned rxrpc_requested_ack_delay = 1;

/*
 * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
 *
 * We use this when we've received new data packets.  If those packets aren't
 * all consumed within this time we will send a DELAY ACK if an ACK was not
 * requested to let the sender know it doesn't need to resend.
 */
unsigned rxrpc_soft_ack_delay = 1 * HZ;

/*
 * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
 *
 * We use this when we've consumed some previously soft-ACK'd packets when
 * further packets aren't immediately received to decide when to send an IDLE
 * ACK let the other end know that it can free up its Tx buffer space.
 */
unsigned rxrpc_idle_ack_delay = 0.5 * HZ;

/*
 * Receive window size in packets.  This indicates the maximum number of
 * unconsumed received packets we're willing to retain in memory.  Once this
 * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
 * packets.
 */
unsigned rxrpc_rx_window_size = 32;

/*
 * Maximum Rx MTU size.  This indicates to the sender the size of jumbo packet
 * made by gluing normal packets together that we're willing to handle.
 */
unsigned rxrpc_rx_mtu = 5692;

/*
 * The maximum number of fragments in a received jumbo packet that we tell the
 * sender that we're willing to handle.
 */
unsigned rxrpc_rx_jumbo_max = 4;

static const char *rxrpc_acks(u8 reason)
{
	static const char *const str[] = {
		"---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
		"IDL", "-?-"
	};

	if (reason >= ARRAY_SIZE(str))
		reason = ARRAY_SIZE(str) - 1;
	return str[reason];
}

static const s8 rxrpc_ack_priority[] = {
	[0]				= 0,
	[RXRPC_ACK_DELAY]		= 1,
	[RXRPC_ACK_REQUESTED]		= 2,
	[RXRPC_ACK_IDLE]		= 3,
	[RXRPC_ACK_PING_RESPONSE]	= 4,
	[RXRPC_ACK_DUPLICATE]		= 5,
	[RXRPC_ACK_OUT_OF_SEQUENCE]	= 6,
	[RXRPC_ACK_EXCEEDS_WINDOW]	= 7,
	[RXRPC_ACK_NOSPACE]		= 8,
};

/*
 * propose an ACK be sent
 */
void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
			 __be32 serial, bool immediate)
{
	unsigned long expiry;
	s8 prior = rxrpc_ack_priority[ack_reason];

	ASSERTCMP(prior, >, 0);

	_enter("{%d},%s,%%%x,%u",
	       call->debug_id, rxrpc_acks(ack_reason), ntohl(serial),
	       immediate);

	if (prior < rxrpc_ack_priority[call->ackr_reason]) {
		if (immediate)
			goto cancel_timer;
		return;
	}

	/* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
	 * numbers */
	if (prior == rxrpc_ack_priority[call->ackr_reason]) {
		if (prior <= 4)
			call->ackr_serial = serial;
		if (immediate)
			goto cancel_timer;
		return;
	}

	call->ackr_reason = ack_reason;
	call->ackr_serial = serial;

	switch (ack_reason) {
	case RXRPC_ACK_DELAY:
		_debug("run delay timer");
		expiry = rxrpc_soft_ack_delay;
		goto run_timer;

	case RXRPC_ACK_IDLE:
		if (!immediate) {
			_debug("run defer timer");
			expiry = rxrpc_idle_ack_delay;
			goto run_timer;
		}
		goto cancel_timer;

	case RXRPC_ACK_REQUESTED:
		expiry = rxrpc_requested_ack_delay;
		if (!expiry)
			goto cancel_timer;
		if (!immediate || serial == cpu_to_be32(1)) {
			_debug("run defer timer");
			goto run_timer;
		}

	default:
		_debug("immediate ACK");
		goto cancel_timer;
	}

run_timer:
	expiry += jiffies;
	if (!timer_pending(&call->ack_timer) ||
	    time_after(call->ack_timer.expires, expiry))
		mod_timer(&call->ack_timer, expiry);
	return;

cancel_timer:
	_debug("cancel timer %%%u", ntohl(serial));
	try_to_del_timer_sync(&call->ack_timer);
	read_lock_bh(&call->state_lock);
	if (call->state <= RXRPC_CALL_COMPLETE &&
	    !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
		rxrpc_queue_call(call);
	read_unlock_bh(&call->state_lock);
}

/*
 * propose an ACK be sent, locking the call structure
 */
void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
		       __be32 serial, bool immediate)
{
	s8 prior = rxrpc_ack_priority[ack_reason];

	if (prior > rxrpc_ack_priority[call->ackr_reason]) {
		spin_lock_bh(&call->lock);
		__rxrpc_propose_ACK(call, ack_reason, serial, immediate);
		spin_unlock_bh(&call->lock);
	}
}

/*
 * set the resend timer
 */
static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
			     unsigned long resend_at)
{
	read_lock_bh(&call->state_lock);
	if (call->state >= RXRPC_CALL_COMPLETE)
		resend = 0;

	if (resend & 1) {
		_debug("SET RESEND");
		set_bit(RXRPC_CALL_RESEND, &call->events);
	}

	if (resend & 2) {
		_debug("MODIFY RESEND TIMER");
		set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
		mod_timer(&call->resend_timer, resend_at);
	} else {
		_debug("KILL RESEND TIMER");
		del_timer_sync(&call->resend_timer);
		clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
	}
	read_unlock_bh(&call->state_lock);
}

/*
 * resend packets
 */
static void rxrpc_resend(struct rxrpc_call *call)
{
	struct rxrpc_skb_priv *sp;
	struct rxrpc_header *hdr;
	struct sk_buff *txb;
	unsigned long *p_txb, resend_at;
	bool stop;
	int loop;
	u8 resend;

	_enter("{%d,%d,%d,%d},",
	       call->acks_hard, call->acks_unacked,
	       atomic_read(&call->sequence),
	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));

	stop = false;
	resend = 0;
	resend_at = 0;

	for (loop = call->acks_tail;
	     loop != call->acks_head || stop;
	     loop = (loop + 1) &  (call->acks_winsz - 1)
	     ) {
		p_txb = call->acks_window + loop;
		smp_read_barrier_depends();
		if (*p_txb & 1)
			continue;

		txb = (struct sk_buff *) *p_txb;
		sp = rxrpc_skb(txb);

		if (sp->need_resend) {
			sp->need_resend = false;

			/* each Tx packet has a new serial number */
			sp->hdr.serial =
				htonl(atomic_inc_return(&call->conn->serial));

			hdr = (struct rxrpc_header *) txb->head;
			hdr->serial = sp->hdr.serial;

			_proto("Tx DATA %%%u { #%d }",
			       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
			if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
				stop = true;
				sp->resend_at = jiffies + 3;
			} else {
				sp->resend_at =
					jiffies + rxrpc_resend_timeout;
			}
		}

		if (time_after_eq(jiffies + 1, sp->resend_at)) {
			sp->need_resend = true;
			resend |= 1;
		} else if (resend & 2) {
			if (time_before(sp->resend_at, resend_at))
				resend_at = sp->resend_at;
		} else {
			resend_at = sp->resend_at;
			resend |= 2;
		}
	}

	rxrpc_set_resend(call, resend, resend_at);
	_leave("");
}

/*
 * handle resend timer expiry
 */
static void rxrpc_resend_timer(struct rxrpc_call *call)
{
	struct rxrpc_skb_priv *sp;
	struct sk_buff *txb;
	unsigned long *p_txb, resend_at;
	int loop;
	u8 resend;

	_enter("%d,%d,%d",
	       call->acks_tail, call->acks_unacked, call->acks_head);

	if (call->state >= RXRPC_CALL_COMPLETE)
		return;

	resend = 0;
	resend_at = 0;

	for (loop = call->acks_unacked;
	     loop != call->acks_head;
	     loop = (loop + 1) &  (call->acks_winsz - 1)
	     ) {
		p_txb = call->acks_window + loop;
		smp_read_barrier_depends();
		txb = (struct sk_buff *) (*p_txb & ~1);
		sp = rxrpc_skb(txb);

		ASSERT(!(*p_txb & 1));

		if (sp->need_resend) {
			;
		} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
			sp->need_resend = true;
			resend |= 1;
		} else if (resend & 2) {
			if (time_before(sp->resend_at, resend_at))
				resend_at = sp->resend_at;
		} else {
			resend_at = sp->resend_at;
			resend |= 2;
		}
	}

	rxrpc_set_resend(call, resend, resend_at);
	_leave("");
}

/*
 * process soft ACKs of our transmitted packets
 * - these indicate packets the peer has or has not received, but hasn't yet
 *   given to the consumer, and so can still be discarded and re-requested
 */
static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
				   struct rxrpc_ackpacket *ack,
				   struct sk_buff *skb)
{
	struct rxrpc_skb_priv *sp;
	struct sk_buff *txb;
	unsigned long *p_txb, resend_at;
	int loop;
	u8 sacks[RXRPC_MAXACKS], resend;

	_enter("{%d,%d},{%d},",
	       call->acks_hard,
	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
	       ack->nAcks);

	if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
		goto protocol_error;

	resend = 0;
	resend_at = 0;
	for (loop = 0; loop < ack->nAcks; loop++) {
		p_txb = call->acks_window;
		p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
		smp_read_barrier_depends();
		txb = (struct sk_buff *) (*p_txb & ~1);
		sp = rxrpc_skb(txb);

		switch (sacks[loop]) {
		case RXRPC_ACK_TYPE_ACK:
			sp->need_resend = false;
			*p_txb |= 1;
			break;
		case RXRPC_ACK_TYPE_NACK:
			sp->need_resend = true;
			*p_txb &= ~1;
			resend = 1;
			break;
		default:
			_debug("Unsupported ACK type %d", sacks[loop]);
			goto protocol_error;
		}
	}

	smp_mb();
	call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);

	/* anything not explicitly ACK'd is implicitly NACK'd, but may just not
	 * have been received or processed yet by the far end */
	for (loop = call->acks_unacked;
	     loop != call->acks_head;
	     loop = (loop + 1) &  (call->acks_winsz - 1)
	     ) {
		p_txb = call->acks_window + loop;
		smp_read_barrier_depends();
		txb = (struct sk_buff *) (*p_txb & ~1);
		sp = rxrpc_skb(txb);

		if (*p_txb & 1) {
			/* packet must have been discarded */
			sp->need_resend = true;
			*p_txb &= ~1;
			resend |= 1;
		} else if (sp->need_resend) {
			;
		} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
			sp->need_resend = true;
			resend |= 1;
		} else if (resend & 2) {
			if (time_before(sp->resend_at, resend_at))
				resend_at = sp->resend_at;
		} else {
			resend_at = sp->resend_at;
			resend |= 2;
		}
	}

	rxrpc_set_resend(call, resend, resend_at);
	_leave(" = 0");
	return 0;

protocol_error:
	_leave(" = -EPROTO");
	return -EPROTO;
}

/*
 * discard hard-ACK'd packets from the Tx window
 */
static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
{
	unsigned long _skb;
	int tail = call->acks_tail, old_tail;
	int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);

	_enter("{%u,%u},%u", call->acks_hard, win, hard);

	ASSERTCMP(hard - call->acks_hard, <=, win);

	while (call->acks_hard < hard) {
		smp_read_barrier_depends();
		_skb = call->acks_window[tail] & ~1;
		rxrpc_free_skb((struct sk_buff *) _skb);
		old_tail = tail;
		tail = (tail + 1) & (call->acks_winsz - 1);
		call->acks_tail = tail;
		if (call->acks_unacked == old_tail)
			call->acks_unacked = tail;
		call->acks_hard++;
	}

	wake_up(&call->tx_waitq);
}

/*
 * clear the Tx window in the event of a failure
 */
static void rxrpc_clear_tx_window(struct rxrpc_call *call)
{
	rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
}

/*
 * drain the out of sequence received packet queue into the packet Rx queue
 */
static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
{
	struct rxrpc_skb_priv *sp;
	struct sk_buff *skb;
	bool terminal;
	int ret;

	_enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);

	spin_lock_bh(&call->lock);

	ret = -ECONNRESET;
	if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
		goto socket_unavailable;

	skb = skb_dequeue(&call->rx_oos_queue);
	if (skb) {
		sp = rxrpc_skb(skb);

		_debug("drain OOS packet %d [%d]",
		       ntohl(sp->hdr.seq), call->rx_first_oos);

		if (ntohl(sp->hdr.seq) != call->rx_first_oos) {
			skb_queue_head(&call->rx_oos_queue, skb);
			call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq);
			_debug("requeue %p {%u}", skb, call->rx_first_oos);
		} else {
			skb->mark = RXRPC_SKB_MARK_DATA;
			terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
				!(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
			ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
			BUG_ON(ret < 0);
			_debug("drain #%u", call->rx_data_post);
			call->rx_data_post++;

			/* find out what the next packet is */
			skb = skb_peek(&call->rx_oos_queue);
			if (skb)
				call->rx_first_oos =
					ntohl(rxrpc_skb(skb)->hdr.seq);
			else
				call->rx_first_oos = 0;
			_debug("peek %p {%u}", skb, call->rx_first_oos);
		}
	}

	ret = 0;
socket_unavailable:
	spin_unlock_bh(&call->lock);
	_leave(" = %d", ret);
	return ret;
}

/*
 * insert an out of sequence packet into the buffer
 */
static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
				    struct sk_buff *skb)
{
	struct rxrpc_skb_priv *sp, *psp;
	struct sk_buff *p;
	u32 seq;

	sp = rxrpc_skb(skb);
	seq = ntohl(sp->hdr.seq);
	_enter(",,{%u}", seq);

	skb->destructor = rxrpc_packet_destructor;
	ASSERTCMP(sp->call, ==, NULL);
	sp->call = call;
	rxrpc_get_call(call);

	/* insert into the buffer in sequence order */
	spin_lock_bh(&call->lock);

	skb_queue_walk(&call->rx_oos_queue, p) {
		psp = rxrpc_skb(p);
		if (ntohl(psp->hdr.seq) > seq) {
			_debug("insert oos #%u before #%u",
			       seq, ntohl(psp->hdr.seq));
			skb_insert(p, skb, &call->rx_oos_queue);
			goto inserted;
		}
	}

	_debug("append oos #%u", seq);
	skb_queue_tail(&call->rx_oos_queue, skb);
inserted:

	/* we might now have a new front to the queue */
	if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
		call->rx_first_oos = seq;

	read_lock(&call->state_lock);
	if (call->state < RXRPC_CALL_COMPLETE &&
	    call->rx_data_post == call->rx_first_oos) {
		_debug("drain rx oos now");
		set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
	}
	read_unlock(&call->state_lock);

	spin_unlock_bh(&call->lock);
	_leave(" [stored #%u]", call->rx_first_oos);
}

/*
 * clear the Tx window on final ACK reception
 */
static void rxrpc_zap_tx_window(struct rxrpc_call *call)
{
	struct rxrpc_skb_priv *sp;
	struct sk_buff *skb;
	unsigned long _skb, *acks_window;
	u8 winsz = call->acks_winsz;
	int tail;

	acks_window = call->acks_window;
	call->acks_window = NULL;

	while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
		tail = call->acks_tail;
		smp_read_barrier_depends();
		_skb = acks_window[tail] & ~1;
		smp_mb();
		call->acks_tail = (call->acks_tail + 1) & (winsz - 1);

		skb = (struct sk_buff *) _skb;
		sp = rxrpc_skb(skb);
		_debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
		rxrpc_free_skb(skb);
	}

	kfree(acks_window);
}

/*
 * process the extra information that may be appended to an ACK packet
 */
static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
				  unsigned int latest, int nAcks)
{
	struct rxrpc_ackinfo ackinfo;
	struct rxrpc_peer *peer;
	unsigned int mtu;

	if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
		_leave(" [no ackinfo]");
		return;
	}

	_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
	       latest,
	       ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
	       ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));

	mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));

	peer = call->conn->trans->peer;
	if (mtu < peer->maxdata) {
		spin_lock_bh(&peer->lock);
		peer->maxdata = mtu;
		peer->mtu = mtu + peer->hdrsize;
		spin_unlock_bh(&peer->lock);
		_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
	}
}

/*
 * process packets in the reception queue
 */
static int rxrpc_process_rx_queue(struct rxrpc_call *call,
				  u32 *_abort_code)
{
	struct rxrpc_ackpacket ack;
	struct rxrpc_skb_priv *sp;
	struct sk_buff *skb;
	bool post_ACK;
	int latest;
	u32 hard, tx;

	_enter("");

process_further:
	skb = skb_dequeue(&call->rx_queue);
	if (!skb)
		return -EAGAIN;

	_net("deferred skb %p", skb);

	sp = rxrpc_skb(skb);

	_debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);

	post_ACK = false;

	switch (sp->hdr.type) {
		/* data packets that wind up here have been received out of
		 * order, need security processing or are jumbo packets */
	case RXRPC_PACKET_TYPE_DATA:
		_proto("OOSQ DATA %%%u { #%u }",
		       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));

		/* secured packets must be verified and possibly decrypted */
		if (rxrpc_verify_packet(call, skb, _abort_code) < 0)
			goto protocol_error;

		rxrpc_insert_oos_packet(call, skb);
		goto process_further;

		/* partial ACK to process */
	case RXRPC_PACKET_TYPE_ACK:
		if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
			_debug("extraction failure");
			goto protocol_error;
		}
		if (!skb_pull(skb, sizeof(ack)))
			BUG();

		latest = ntohl(sp->hdr.serial);
		hard = ntohl(ack.firstPacket);
		tx = atomic_read(&call->sequence);

		_proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
		       latest,
		       ntohs(ack.maxSkew),
		       hard,
		       ntohl(ack.previousPacket),
		       ntohl(ack.serial),
		       rxrpc_acks(ack.reason),
		       ack.nAcks);

		rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);

		if (ack.reason == RXRPC_ACK_PING) {
			_proto("Rx ACK %%%u PING Request", latest);
			rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
					  sp->hdr.serial, true);
		}

		/* discard any out-of-order or duplicate ACKs */
		if (latest - call->acks_latest <= 0) {
			_debug("discard ACK %d <= %d",
			       latest, call->acks_latest);
			goto discard;
		}
		call->acks_latest = latest;

		if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
		    call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
		    call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
		    call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
			goto discard;

		_debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);

		if (hard > 0) {
			if (hard - 1 > tx) {
				_debug("hard-ACK'd packet %d not transmitted"
				       " (%d top)",
				       hard - 1, tx);
				goto protocol_error;
			}

			if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
			     call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
			    hard > tx)
				goto all_acked;

			smp_rmb();
			rxrpc_rotate_tx_window(call, hard - 1);
		}

		if (ack.nAcks > 0) {
			if (hard - 1 + ack.nAcks > tx) {
				_debug("soft-ACK'd packet %d+%d not"
				       " transmitted (%d top)",
				       hard - 1, ack.nAcks, tx);
				goto protocol_error;
			}

			if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
				goto protocol_error;
		}
		goto discard;

		/* complete ACK to process */
	case RXRPC_PACKET_TYPE_ACKALL:
		goto all_acked;

		/* abort and busy are handled elsewhere */
	case RXRPC_PACKET_TYPE_BUSY:
	case RXRPC_PACKET_TYPE_ABORT:
		BUG();

		/* connection level events - also handled elsewhere */
	case RXRPC_PACKET_TYPE_CHALLENGE:
	case RXRPC_PACKET_TYPE_RESPONSE:
	case RXRPC_PACKET_TYPE_DEBUG:
		BUG();
	}

	/* if we've had a hard ACK that covers all the packets we've sent, then
	 * that ends that phase of the operation */
all_acked:
	write_lock_bh(&call->state_lock);
	_debug("ack all %d", call->state);

	switch (call->state) {
	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
		call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
		break;
	case RXRPC_CALL_SERVER_AWAIT_ACK:
		_debug("srv complete");
		call->state = RXRPC_CALL_COMPLETE;
		post_ACK = true;
		break;
	case RXRPC_CALL_CLIENT_SEND_REQUEST:
	case RXRPC_CALL_SERVER_RECV_REQUEST:
		goto protocol_error_unlock; /* can't occur yet */
	default:
		write_unlock_bh(&call->state_lock);
		goto discard; /* assume packet left over from earlier phase */
	}

	write_unlock_bh(&call->state_lock);

	/* if all the packets we sent are hard-ACK'd, then we can discard
	 * whatever we've got left */
	_debug("clear Tx %d",
	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));

	del_timer_sync(&call->resend_timer);
	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
	clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);

	if (call->acks_window)
		rxrpc_zap_tx_window(call);

	if (post_ACK) {
		/* post the final ACK message for userspace to pick up */
		_debug("post ACK");
		skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
		sp->call = call;
		rxrpc_get_call(call);
		spin_lock_bh(&call->lock);
		if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
			BUG();
		spin_unlock_bh(&call->lock);
		goto process_further;
	}

discard:
	rxrpc_free_skb(skb);
	goto process_further;

protocol_error_unlock:
	write_unlock_bh(&call->state_lock);
protocol_error:
	rxrpc_free_skb(skb);
	_leave(" = -EPROTO");
	return -EPROTO;
}

/*
 * post a message to the socket Rx queue for recvmsg() to pick up
 */
static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
			      bool fatal)
{
	struct rxrpc_skb_priv *sp;
	struct sk_buff *skb;
	int ret;

	_enter("{%d,%lx},%u,%u,%d",
	       call->debug_id, call->flags, mark, error, fatal);

	/* remove timers and things for fatal messages */
	if (fatal) {
		del_timer_sync(&call->resend_timer);
		del_timer_sync(&call->ack_timer);
		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
	}

	if (mark != RXRPC_SKB_MARK_NEW_CALL &&
	    !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
		_leave("[no userid]");
		return 0;
	}

	if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
		skb = alloc_skb(0, GFP_NOFS);
		if (!skb)
			return -ENOMEM;

		rxrpc_new_skb(skb);

		skb->mark = mark;

		sp = rxrpc_skb(skb);
		memset(sp, 0, sizeof(*sp));
		sp->error = error;
		sp->call = call;
		rxrpc_get_call(call);

		spin_lock_bh(&call->lock);
		ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
		spin_unlock_bh(&call->lock);
		BUG_ON(ret < 0);
	}

	return 0;
}

/*
 * handle background processing of incoming call packets and ACK / abort
 * generation
 */
void rxrpc_process_call(struct work_struct *work)
{
	struct rxrpc_call *call =
		container_of(work, struct rxrpc_call, processor);
	struct rxrpc_ackpacket ack;
	struct rxrpc_ackinfo ackinfo;
	struct rxrpc_header hdr;
	struct msghdr msg;
	struct kvec iov[5];
	unsigned long bits;
	__be32 data, pad;
	size_t len;
	int genbit, loop, nbit, ioc, ret, mtu;
	u32 abort_code = RX_PROTOCOL_ERROR;
	u8 *acks = NULL;

	//printk("\n--------------------\n");
	_enter("{%d,%s,%lx} [%lu]",
	       call->debug_id, rxrpc_call_states[call->state], call->events,
	       (jiffies - call->creation_jif) / (HZ / 10));

	if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
		_debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
		return;
	}

	/* there's a good chance we're going to have to send a message, so set
	 * one up in advance */
	msg.msg_name	= &call->conn->trans->peer->srx.transport.sin;
	msg.msg_namelen	= sizeof(call->conn->trans->peer->srx.transport.sin);
	msg.msg_control	= NULL;
	msg.msg_controllen = 0;
	msg.msg_flags	= 0;

	hdr.epoch	= call->conn->epoch;
	hdr.cid		= call->cid;
	hdr.callNumber	= call->call_id;
	hdr.seq		= 0;
	hdr.type	= RXRPC_PACKET_TYPE_ACK;
	hdr.flags	= call->conn->out_clientflag;
	hdr.userStatus	= 0;
	hdr.securityIndex = call->conn->security_ix;
	hdr._rsvd	= 0;
	hdr.serviceId	= call->conn->service_id;

	memset(iov, 0, sizeof(iov));
	iov[0].iov_base	= &hdr;
	iov[0].iov_len	= sizeof(hdr);

	/* deal with events of a final nature */
	if (test_bit(RXRPC_CALL_RELEASE, &call->events)) {
		rxrpc_release_call(call);
		clear_bit(RXRPC_CALL_RELEASE, &call->events);
	}

	if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) {
		int error;

		clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
		clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
		clear_bit(RXRPC_CALL_ABORT, &call->events);

		error = call->conn->trans->peer->net_error;
		_debug("post net error %d", error);

		if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
				       error, true) < 0)
			goto no_mem;
		clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
		goto kill_ACKs;
	}

	if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) {
		ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);

		clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
		clear_bit(RXRPC_CALL_ABORT, &call->events);

		_debug("post conn abort");

		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
				       call->conn->error, true) < 0)
			goto no_mem;
		clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
		goto kill_ACKs;
	}

	if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) {
		hdr.type = RXRPC_PACKET_TYPE_BUSY;
		genbit = RXRPC_CALL_REJECT_BUSY;
		goto send_message;
	}

	if (test_bit(RXRPC_CALL_ABORT, &call->events)) {
		ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);

		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
				       ECONNABORTED, true) < 0)
			goto no_mem;
		hdr.type = RXRPC_PACKET_TYPE_ABORT;
		data = htonl(call->abort_code);
		iov[1].iov_base = &data;
		iov[1].iov_len = sizeof(data);
		genbit = RXRPC_CALL_ABORT;
		goto send_message;
	}

	if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
		genbit = RXRPC_CALL_ACK_FINAL;

		ack.bufferSpace	= htons(8);
		ack.maxSkew	= 0;
		ack.serial	= 0;
		ack.reason	= RXRPC_ACK_IDLE;
		ack.nAcks	= 0;
		call->ackr_reason = 0;

		spin_lock_bh(&call->lock);
		ack.serial = call->ackr_serial;
		ack.previousPacket = call->ackr_prev_seq;
		ack.firstPacket = htonl(call->rx_data_eaten + 1);
		spin_unlock_bh(&call->lock);

		pad = 0;

		iov[1].iov_base = &ack;
		iov[1].iov_len	= sizeof(ack);
		iov[2].iov_base = &pad;
		iov[2].iov_len	= 3;
		iov[3].iov_base = &ackinfo;
		iov[3].iov_len	= sizeof(ackinfo);
		goto send_ACK;
	}

	if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
			    (1 << RXRPC_CALL_RCVD_ABORT))
	    ) {
		u32 mark;

		if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events))
			mark = RXRPC_SKB_MARK_REMOTE_ABORT;
		else
			mark = RXRPC_SKB_MARK_BUSY;

		_debug("post abort/busy");
		rxrpc_clear_tx_window(call);
		if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
			goto no_mem;

		clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
		clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
		goto kill_ACKs;
	}

	if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) {
		_debug("do implicit ackall");
		rxrpc_clear_tx_window(call);
	}

	if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) {
		write_lock_bh(&call->state_lock);
		if (call->state <= RXRPC_CALL_COMPLETE) {
			call->state = RXRPC_CALL_LOCALLY_ABORTED;
			call->abort_code = RX_CALL_TIMEOUT;
			set_bit(RXRPC_CALL_ABORT, &call->events);
		}
		write_unlock_bh(&call->state_lock);

		_debug("post timeout");
		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
				       ETIME, true) < 0)
			goto no_mem;

		clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
		goto kill_ACKs;
	}

	/* deal with assorted inbound messages */
	if (!skb_queue_empty(&call->rx_queue)) {
		switch (rxrpc_process_rx_queue(call, &abort_code)) {
		case 0:
		case -EAGAIN:
			break;
		case -ENOMEM:
			goto no_mem;
		case -EKEYEXPIRED:
		case -EKEYREJECTED:
		case -EPROTO:
			rxrpc_abort_call(call, abort_code);
			goto kill_ACKs;
		}
	}

	/* handle resending */
	if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
		rxrpc_resend_timer(call);
	if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events))
		rxrpc_resend(call);

	/* consider sending an ordinary ACK */
	if (test_bit(RXRPC_CALL_ACK, &call->events)) {
		_debug("send ACK: window: %d - %d { %lx }",
		       call->rx_data_eaten, call->ackr_win_top,
		       call->ackr_window[0]);

		if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
		    call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
			/* ACK by sending reply DATA packet in this state */
			clear_bit(RXRPC_CALL_ACK, &call->events);
			goto maybe_reschedule;
		}

		genbit = RXRPC_CALL_ACK;

		acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
			       GFP_NOFS);
		if (!acks)
			goto no_mem;

		//hdr.flags	= RXRPC_SLOW_START_OK;
		ack.bufferSpace	= htons(8);
		ack.maxSkew	= 0;
		ack.serial	= 0;
		ack.reason	= 0;

		spin_lock_bh(&call->lock);
		ack.reason = call->ackr_reason;
		ack.serial = call->ackr_serial;
		ack.previousPacket = call->ackr_prev_seq;
		ack.firstPacket = htonl(call->rx_data_eaten + 1);

		ack.nAcks = 0;
		for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
			nbit = loop * BITS_PER_LONG;
			for (bits = call->ackr_window[loop]; bits; bits >>= 1
			     ) {
				_debug("- l=%d n=%d b=%lx", loop, nbit, bits);
				if (bits & 1) {
					acks[nbit] = RXRPC_ACK_TYPE_ACK;
					ack.nAcks = nbit + 1;
				}
				nbit++;
			}
		}
		call->ackr_reason = 0;
		spin_unlock_bh(&call->lock);

		pad = 0;

		iov[1].iov_base = &ack;
		iov[1].iov_len	= sizeof(ack);
		iov[2].iov_base = acks;
		iov[2].iov_len	= ack.nAcks;
		iov[3].iov_base = &pad;
		iov[3].iov_len	= 3;
		iov[4].iov_base = &ackinfo;
		iov[4].iov_len	= sizeof(ackinfo);

		switch (ack.reason) {
		case RXRPC_ACK_REQUESTED:
		case RXRPC_ACK_DUPLICATE:
		case RXRPC_ACK_OUT_OF_SEQUENCE:
		case RXRPC_ACK_EXCEEDS_WINDOW:
		case RXRPC_ACK_NOSPACE:
		case RXRPC_ACK_PING:
		case RXRPC_ACK_PING_RESPONSE:
			goto send_ACK_with_skew;
		case RXRPC_ACK_DELAY:
		case RXRPC_ACK_IDLE:
			goto send_ACK;
		}
	}

	/* handle completion of security negotiations on an incoming
	 * connection */
	if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) {
		_debug("secured");
		spin_lock_bh(&call->lock);

		if (call->state == RXRPC_CALL_SERVER_SECURING) {
			_debug("securing");
			write_lock(&call->conn->lock);
			if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
			    !test_bit(RXRPC_CALL_RELEASE, &call->events)) {
				_debug("not released");
				call->state = RXRPC_CALL_SERVER_ACCEPTING;
				list_move_tail(&call->accept_link,
					       &call->socket->acceptq);
			}
			write_unlock(&call->conn->lock);
			read_lock(&call->state_lock);
			if (call->state < RXRPC_CALL_COMPLETE)
				set_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
			read_unlock(&call->state_lock);
		}

		spin_unlock_bh(&call->lock);
		if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events))
			goto maybe_reschedule;
	}

	/* post a notification of an acceptable connection to the app */
	if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) {
		_debug("post accept");
		if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
				       0, false) < 0)
			goto no_mem;
		clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
		goto maybe_reschedule;
	}

	/* handle incoming call acceptance */
	if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) {
		_debug("accepted");
		ASSERTCMP(call->rx_data_post, ==, 0);
		call->rx_data_post = 1;
		read_lock_bh(&call->state_lock);
		if (call->state < RXRPC_CALL_COMPLETE)
			set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
		read_unlock_bh(&call->state_lock);
	}

	/* drain the out of sequence received packet queue into the packet Rx
	 * queue */
	if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) {
		while (call->rx_data_post == call->rx_first_oos)
			if (rxrpc_drain_rx_oos_queue(call) < 0)
				break;
		goto maybe_reschedule;
	}

	/* other events may have been raised since we started checking */
	goto maybe_reschedule;

send_ACK_with_skew:
	ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
			    ntohl(ack.serial));
send_ACK:
	mtu = call->conn->trans->peer->if_mtu;
	mtu -= call->conn->trans->peer->hdrsize;
	ackinfo.maxMTU	= htonl(mtu);
	ackinfo.rwind	= htonl(rxrpc_rx_window_size);

	/* permit the peer to send us jumbo packets if it wants to */
	ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
	ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);

	hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
	_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
	       ntohl(hdr.serial),
	       ntohs(ack.maxSkew),
	       ntohl(ack.firstPacket),
	       ntohl(ack.previousPacket),
	       ntohl(ack.serial),
	       rxrpc_acks(ack.reason),
	       ack.nAcks);

	del_timer_sync(&call->ack_timer);
	if (ack.nAcks > 0)
		set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
	goto send_message_2;

send_message:
	_debug("send message");

	hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
	_proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial));
send_message_2:

	len = iov[0].iov_len;
	ioc = 1;
	if (iov[4].iov_len) {
		ioc = 5;
		len += iov[4].iov_len;
		len += iov[3].iov_len;
		len += iov[2].iov_len;
		len += iov[1].iov_len;
	} else if (iov[3].iov_len) {
		ioc = 4;
		len += iov[3].iov_len;
		len += iov[2].iov_len;
		len += iov[1].iov_len;
	} else if (iov[2].iov_len) {
		ioc = 3;
		len += iov[2].iov_len;
		len += iov[1].iov_len;
	} else if (iov[1].iov_len) {
		ioc = 2;
		len += iov[1].iov_len;
	}

	ret = kernel_sendmsg(call->conn->trans->local->socket,
			     &msg, iov, ioc, len);
	if (ret < 0) {
		_debug("sendmsg failed: %d", ret);
		read_lock_bh(&call->state_lock);
		if (call->state < RXRPC_CALL_DEAD)
			rxrpc_queue_call(call);
		read_unlock_bh(&call->state_lock);
		goto error;
	}

	switch (genbit) {
	case RXRPC_CALL_ABORT:
		clear_bit(genbit, &call->events);
		clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
		goto kill_ACKs;

	case RXRPC_CALL_ACK_FINAL:
		write_lock_bh(&call->state_lock);
		if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
			call->state = RXRPC_CALL_COMPLETE;
		write_unlock_bh(&call->state_lock);
		goto kill_ACKs;

	default:
		clear_bit(genbit, &call->events);
		switch (call->state) {
		case RXRPC_CALL_CLIENT_AWAIT_REPLY:
		case RXRPC_CALL_CLIENT_RECV_REPLY:
		case RXRPC_CALL_SERVER_RECV_REQUEST:
		case RXRPC_CALL_SERVER_ACK_REQUEST:
			_debug("start ACK timer");
			rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
					  call->ackr_serial, false);
		default:
			break;
		}
		goto maybe_reschedule;
	}

kill_ACKs:
	del_timer_sync(&call->ack_timer);
	if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events))
		rxrpc_put_call(call);
	clear_bit(RXRPC_CALL_ACK, &call->events);

maybe_reschedule:
	if (call->events || !skb_queue_empty(&call->rx_queue)) {
		read_lock_bh(&call->state_lock);
		if (call->state < RXRPC_CALL_DEAD)
			rxrpc_queue_call(call);
		read_unlock_bh(&call->state_lock);
	}

	/* don't leave aborted connections on the accept queue */
	if (call->state >= RXRPC_CALL_COMPLETE &&
	    !list_empty(&call->accept_link)) {
		_debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
		       call, call->events, call->flags,
		       ntohl(call->conn->cid));

		read_lock_bh(&call->state_lock);
		if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
		    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
			rxrpc_queue_call(call);
		read_unlock_bh(&call->state_lock);
	}

error:
	clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
	kfree(acks);

	/* because we don't want two CPUs both processing the work item for one
	 * call at the same time, we use a flag to note when it's busy; however
	 * this means there's a race between clearing the flag and setting the
	 * work pending bit and the work item being processed again */
	if (call->events && !work_pending(&call->processor)) {
		_debug("jumpstart %x", ntohl(call->conn->cid));
		rxrpc_queue_call(call);
	}

	_leave("");
	return;

no_mem:
	_debug("out of memory");
	goto maybe_reschedule;
}