2
0
mirror of https://github.com/xcat2/xNBA.git synced 2024-11-22 17:41:55 +00:00

[tcp] Add support for TCP window scaling

The maximum unscaled TCP window (64kB) implies a maximum bandwidth of
around 300kB/s on a WAN link with an RTT of 200ms.  Add support for
the TCP window scaling option to remove this upper limit.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
This commit is contained in:
Michael Brown 2012-06-29 14:30:18 +01:00
parent 76d9c1a001
commit ea61075c60
2 changed files with 58 additions and 3 deletions

View File

@ -54,6 +54,31 @@ struct tcp_mss_option {
/** Code for the TCP MSS option */
#define TCP_OPTION_MSS 2
/** TCP window scale option */
struct tcp_window_scale_option {
uint8_t kind;
uint8_t length;
uint8_t scale;
} __attribute__ (( packed ));
/** Padded TCP window scale option (used for sending) */
struct tcp_window_scale_padded_option {
uint8_t nop;
struct tcp_window_scale_option wsopt;
} __attribute (( packed ));
/** Code for the TCP window scale option */
#define TCP_OPTION_WS 3
/** Advertised TCP window scale
*
* Using a scale factor of 2**9 provides for a maximum window of 32MB,
* which is sufficient to allow Gigabit-speed transfers with a 200ms
* RTT. The minimum advertised window is 512 bytes, which is still
* less than a single packet.
*/
#define TCP_RX_WINDOW_SCALE 9
/** TCP timestamp option */
struct tcp_timestamp_option {
uint8_t kind;
@ -75,7 +100,9 @@ struct tcp_timestamp_padded_option {
struct tcp_options {
/** MSS option, if present */
const struct tcp_mss_option *mssopt;
/** Timestampe option, if present */
/** Window scale option, if present */
const struct tcp_window_scale_option *wsopt;
/** Timestamp option, if present */
const struct tcp_timestamp_option *tsopt;
};
@ -316,6 +343,7 @@ struct tcp_options {
( MAX_LL_NET_HEADER_LEN + \
sizeof ( struct tcp_header ) + \
sizeof ( struct tcp_mss_option ) + \
sizeof ( struct tcp_window_scale_padded_option ) + \
sizeof ( struct tcp_timestamp_padded_option ) )
/**

View File

@ -87,6 +87,16 @@ struct tcp_connection {
* Equivalent to TS.Recent in RFC 1323 terminology.
*/
uint32_t ts_recent;
/** Send window scale
*
* Equivalent to Snd.Wind.Scale in RFC 1323 terminology
*/
uint8_t snd_win_scale;
/** Receive window scale
*
* Equivalent to Rcv.Wind.Scale in RFC 1323 terminology
*/
uint8_t rcv_win_scale;
/** Transmit queue */
struct list_head tx_queue;
@ -490,6 +500,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
struct io_buffer *iobuf;
struct tcp_header *tcphdr;
struct tcp_mss_option *mssopt;
struct tcp_window_scale_padded_option *wsopt;
struct tcp_timestamp_padded_option *tsopt;
void *payload;
unsigned int flags;
@ -497,6 +508,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
uint32_t seq_len;
uint32_t app_win;
uint32_t max_rcv_win;
uint32_t max_representable_win;
int rc;
/* If retransmission timer is already running, do nothing */
@ -551,6 +563,9 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
app_win = xfer_window ( &tcp->xfer );
if ( max_rcv_win > app_win )
max_rcv_win = app_win;
max_representable_win = ( 0xffff << tcp->rcv_win_scale );
if ( max_rcv_win > max_representable_win )
max_rcv_win = max_representable_win;
max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
if ( tcp->rcv_win < max_rcv_win )
tcp->rcv_win = max_rcv_win;
@ -562,6 +577,11 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
mssopt->kind = TCP_OPTION_MSS;
mssopt->length = sizeof ( *mssopt );
mssopt->mss = htons ( TCP_MSS );
wsopt = iob_push ( iobuf, sizeof ( *wsopt ) );
wsopt->nop = TCP_OPTION_NOP;
wsopt->wsopt.kind = TCP_OPTION_WS;
wsopt->wsopt.length = sizeof ( wsopt->wsopt );
wsopt->wsopt.scale = TCP_RX_WINDOW_SCALE;
}
if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
@ -581,7 +601,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
tcphdr->ack = htonl ( tcp->rcv_ack );
tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
tcphdr->flags = flags;
tcphdr->win = htons ( tcp->rcv_win );
tcphdr->win = htons ( tcp->rcv_win >> tcp->rcv_win_scale );
tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
/* Dump header */
@ -769,6 +789,9 @@ static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
case TCP_OPTION_MSS:
options->mssopt = data;
break;
case TCP_OPTION_WS:
options->wsopt = data;
break;
case TCP_OPTION_TS:
options->tsopt = data;
break;
@ -825,6 +848,10 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
tcp->rcv_ack = seq;
if ( options->tsopt )
tcp->flags |= TCP_TS_ENABLED;
if ( options->wsopt ) {
tcp->snd_win_scale = options->wsopt->scale;
tcp->rcv_win_scale = TCP_RX_WINDOW_SCALE;
}
}
/* Ignore duplicate SYN */
@ -1168,7 +1195,7 @@ static int tcp_rx ( struct io_buffer *iobuf,
tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
seq = ntohl ( tcphdr->seq );
ack = ntohl ( tcphdr->ack );
win = ntohs ( tcphdr->win );
win = ( ntohs ( tcphdr->win ) << tcp->snd_win_scale );
flags = tcphdr->flags;
tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ),
( hlen - sizeof ( *tcphdr ) ), &options );