From 399756e2038b80d9c4b3eeb2db4be782e1006fc4 Mon Sep 17 00:00:00 2001 From: jbjohnso Date: Fri, 12 Nov 2010 15:57:37 +0000 Subject: [PATCH] -Add ipxe variants of most patches (ignoring hdboot and old kvm workaround for now) Former-commit-id: fa6396d64cbcee40b1306ac3aa17f99267706d30 --- gpxe/ipxe-branding.patch | 14 + gpxe/ipxe-cmdlinesize.patch | 11 + gpxe/ipxe-config.patch | 78 ++ gpxe/ipxe-droppackets.patch | 1789 ++++++++++++++++++++++++++++++++ gpxe/ipxe-expandfilename.patch | 33 + gpxe/ipxe-machyp.patch | 141 +++ gpxe/ipxe-undinetchange.patch | 16 + gpxe/ipxe-xnbaclass.patch | 12 + 8 files changed, 2094 insertions(+) create mode 100644 gpxe/ipxe-branding.patch create mode 100644 gpxe/ipxe-cmdlinesize.patch create mode 100644 gpxe/ipxe-config.patch create mode 100644 gpxe/ipxe-droppackets.patch create mode 100644 gpxe/ipxe-expandfilename.patch create mode 100644 gpxe/ipxe-machyp.patch create mode 100644 gpxe/ipxe-undinetchange.patch create mode 100644 gpxe/ipxe-xnbaclass.patch diff --git a/gpxe/ipxe-branding.patch b/gpxe/ipxe-branding.patch new file mode 100644 index 0000000..ecc2afb --- /dev/null +++ b/gpxe/ipxe-branding.patch @@ -0,0 +1,14 @@ +diff -urN ipxe/src/config/general.h ipxe-branded/src/config/general.h +--- ipxe/src/config/general.h 2010-11-11 14:09:48.052334980 -0500 ++++ ipxe-branded/src/config/general.h 2010-11-12 09:33:37.493584218 -0500 +@@ -24,8 +24,8 @@ + * "iPXE". + * + */ +-#define PRODUCT_NAME "" +-#define PRODUCT_SHORT_NAME "iPXE" ++#define PRODUCT_NAME "xCAT Network Boot Agent" ++#define PRODUCT_SHORT_NAME "xNBA" + + /* + * Timer configuration diff --git a/gpxe/ipxe-cmdlinesize.patch b/gpxe/ipxe-cmdlinesize.patch new file mode 100644 index 0000000..55d1f10 --- /dev/null +++ b/gpxe/ipxe-cmdlinesize.patch @@ -0,0 +1,11 @@ +diff -urN ipxe-expandfilename/src/arch/i386/include/bzimage.h ipxe-cmdlinesize/src/arch/i386/include/bzimage.h +--- ipxe-expandfilename/src/arch/i386/include/bzimage.h 2010-11-11 14:09:48.042334268 -0500 ++++ ipxe-cmdlinesize/src/arch/i386/include/bzimage.h 2010-11-12 10:47:08.086084309 -0500 +@@ -137,6 +137,6 @@ + #define BZI_STACK_SIZE 0x1000 + + /** Maximum size of command line */ +-#define BZI_CMDLINE_SIZE 0x100 ++#define BZI_CMDLINE_SIZE 0x7FF + + #endif /* _BZIMAGE_H */ diff --git a/gpxe/ipxe-config.patch b/gpxe/ipxe-config.patch new file mode 100644 index 0000000..f5c9d6d --- /dev/null +++ b/gpxe/ipxe-config.patch @@ -0,0 +1,78 @@ +diff -urN ipxe-branded/src/config/general.h ipxe-configged/src/config/general.h +--- ipxe-branded/src/config/general.h 2010-11-12 09:33:37.493584218 -0500 ++++ ipxe-configged/src/config/general.h 2010-11-12 09:50:59.112334015 -0500 +@@ -40,7 +40,7 @@ + */ + + #define NET_PROTO_IPV4 /* IPv4 protocol */ +-#undef NET_PROTO_FCOE /* Fibre Channel over Ethernet protocol */ ++#define NET_PROTO_FCOE /* Fibre Channel over Ethernet protocol */ + + /* + * PXE support +@@ -57,7 +57,7 @@ + #define DOWNLOAD_PROTO_TFTP /* Trivial File Transfer Protocol */ + #define DOWNLOAD_PROTO_HTTP /* Hypertext Transfer Protocol */ + #undef DOWNLOAD_PROTO_HTTPS /* Secure Hypertext Transfer Protocol */ +-#undef DOWNLOAD_PROTO_FTP /* File Transfer Protocol */ ++#define DOWNLOAD_PROTO_FTP /* File Transfer Protocol */ + #undef DOWNLOAD_PROTO_TFTM /* Multicast Trivial File Transfer Protocol */ + #undef DOWNLOAD_PROTO_SLAM /* Scalable Local Area Multicast */ + +@@ -66,18 +66,18 @@ + * + */ + +-//#undef SANBOOT_PROTO_ISCSI /* iSCSI protocol */ +-//#undef SANBOOT_PROTO_AOE /* AoE protocol */ +-//#undef SANBOOT_PROTO_IB_SRP /* Infiniband SCSI RDMA protocol */ +-//#undef SANBOOT_PROTO_FCP /* Fibre Channel protocol */ ++#define SANBOOT_PROTO_ISCSI /* iSCSI protocol */ ++#define SANBOOT_PROTO_AOE /* AoE protocol */ ++#define SANBOOT_PROTO_IB_SRP /* Infiniband SCSI RDMA protocol */ ++#define SANBOOT_PROTO_FCP /* Fibre Channel protocol */ + + /* + * 802.11 cryptosystems and handshaking protocols + * + */ +-#define CRYPTO_80211_WEP /* WEP encryption (deprecated and insecure!) */ +-#define CRYPTO_80211_WPA /* WPA Personal, authenticating with passphrase */ +-#define CRYPTO_80211_WPA2 /* Add support for stronger WPA cryptography */ ++#undef CRYPTO_80211_WEP /* WEP encryption (deprecated and insecure!) */ ++#undef CRYPTO_80211_WPA /* WPA Personal, authenticating with passphrase */ ++#undef CRYPTO_80211_WPA2 /* Add support for stronger WPA cryptography */ + + /* + * Name resolution modules +@@ -99,9 +99,9 @@ + //#define IMAGE_MULTIBOOT /* MultiBoot image support */ + //#define IMAGE_AOUT /* a.out image support */ + //#define IMAGE_WINCE /* WinCE image support */ +-//#define IMAGE_PXE /* PXE image support */ +-//#define IMAGE_SCRIPT /* iPXE script image support */ +-//#define IMAGE_BZIMAGE /* Linux bzImage image support */ ++#define IMAGE_PXE /* PXE image support */ ++#define IMAGE_SCRIPT /* iPXE script image support */ ++#define IMAGE_BZIMAGE /* Linux bzImage image support */ + //#define IMAGE_COMBOOT /* SYSLINUX COMBOOT image support */ + //#define IMAGE_EFI /* EFI image support */ + +@@ -113,7 +113,7 @@ + #define NVO_CMD /* Non-volatile option storage commands */ + #define CONFIG_CMD /* Option configuration console */ + #define IFMGMT_CMD /* Interface management commands */ +-#define IWMGMT_CMD /* Wireless interface management commands */ ++#undef IWMGMT_CMD /* Wireless interface management commands */ + #define FCMGMT_CMD /* Fibre Channel management commands */ + #define ROUTE_CMD /* Routing table management commands */ + #define IMAGE_CMD /* Image management commands */ +@@ -123,7 +123,7 @@ + #undef TIME_CMD /* Time commands */ + #undef DIGEST_CMD /* Image crypto digest commands */ + #undef LOTEST_CMD /* Loopback testing commands */ +-//#undef PXE_CMD /* PXE commands */ ++#undef PXE_CMD /* PXE commands */ + + /* + * Error message tables to include diff --git a/gpxe/ipxe-droppackets.patch b/gpxe/ipxe-droppackets.patch new file mode 100644 index 0000000..d4d02d7 --- /dev/null +++ b/gpxe/ipxe-droppackets.patch @@ -0,0 +1,1789 @@ +diff -urN ipxe-configged/src/net/arp.c ipxe-droppackets/src/net/arp.c +--- ipxe-configged/src/net/arp.c 2010-11-11 14:09:48.132334810 -0500 ++++ ipxe-droppackets/src/net/arp.c 2010-11-12 10:33:14.482334325 -0500 +@@ -234,7 +234,7 @@ + goto done; + + /* Create new ARP table entry if necessary */ +- if ( ! merge ) { ++ if ( ( arphdr->ar_op == htons ( ARPOP_REPLY ) ) && ( ! merge ) ) { + arp = &arp_table[next_new_arp_entry++ % NUM_ARP_ENTRIES]; + arp->ll_protocol = ll_protocol; + arp->net_protocol = net_protocol; +diff -urN ipxe-configged/src/net/arp.c.orig ipxe-droppackets/src/net/arp.c.orig +--- ipxe-configged/src/net/arp.c.orig 1969-12-31 19:00:00.000000000 -0500 ++++ ipxe-droppackets/src/net/arp.c.orig 2010-11-11 14:09:48.132334810 -0500 +@@ -0,0 +1,290 @@ ++/* ++ * Copyright (C) 2006 Michael Brown . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++FILE_LICENCE ( GPL2_OR_LATER ); ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** @file ++ * ++ * Address Resolution Protocol ++ * ++ * This file implements the address resolution protocol as defined in ++ * RFC826. The implementation is media-independent and ++ * protocol-independent; it is not limited to Ethernet or to IPv4. ++ * ++ */ ++ ++/** An ARP cache entry */ ++struct arp_entry { ++ /** Network-layer protocol */ ++ struct net_protocol *net_protocol; ++ /** Link-layer protocol */ ++ struct ll_protocol *ll_protocol; ++ /** Network-layer address */ ++ uint8_t net_addr[MAX_NET_ADDR_LEN]; ++ /** Link-layer address */ ++ uint8_t ll_addr[MAX_LL_ADDR_LEN]; ++}; ++ ++/** Number of entries in the ARP cache ++ * ++ * This is a global cache, covering all network interfaces, ++ * network-layer protocols and link-layer protocols. ++ */ ++#define NUM_ARP_ENTRIES 4 ++ ++/** The ARP cache */ ++static struct arp_entry arp_table[NUM_ARP_ENTRIES]; ++#define arp_table_end &arp_table[NUM_ARP_ENTRIES] ++ ++static unsigned int next_new_arp_entry = 0; ++ ++struct net_protocol arp_protocol __net_protocol; ++ ++/** ++ * Find entry in the ARP cache ++ * ++ * @v ll_protocol Link-layer protocol ++ * @v net_protocol Network-layer protocol ++ * @v net_addr Network-layer address ++ * @ret arp ARP cache entry, or NULL if not found ++ * ++ */ ++static struct arp_entry * ++arp_find_entry ( struct ll_protocol *ll_protocol, ++ struct net_protocol *net_protocol, ++ const void *net_addr ) { ++ struct arp_entry *arp; ++ ++ for ( arp = arp_table ; arp < arp_table_end ; arp++ ) { ++ if ( ( arp->ll_protocol == ll_protocol ) && ++ ( arp->net_protocol == net_protocol ) && ++ ( memcmp ( arp->net_addr, net_addr, ++ net_protocol->net_addr_len ) == 0 ) ) ++ return arp; ++ } ++ return NULL; ++} ++ ++/** ++ * Look up media-specific link-layer address in the ARP cache ++ * ++ * @v netdev Network device ++ * @v net_protocol Network-layer protocol ++ * @v dest_net_addr Destination network-layer address ++ * @v source_net_addr Source network-layer address ++ * @ret dest_ll_addr Destination link layer address ++ * @ret rc Return status code ++ * ++ * This function will use the ARP cache to look up the link-layer ++ * address for the link-layer protocol associated with the network ++ * device and the given network-layer protocol and addresses. If ++ * found, the destination link-layer address will be filled in in @c ++ * dest_ll_addr. ++ * ++ * If no address is found in the ARP cache, an ARP request will be ++ * transmitted on the specified network device and -ENOENT will be ++ * returned. ++ */ ++int arp_resolve ( struct net_device *netdev, struct net_protocol *net_protocol, ++ const void *dest_net_addr, const void *source_net_addr, ++ void *dest_ll_addr ) { ++ struct ll_protocol *ll_protocol = netdev->ll_protocol; ++ const struct arp_entry *arp; ++ struct io_buffer *iobuf; ++ struct arphdr *arphdr; ++ int rc; ++ ++ /* Look for existing entry in ARP table */ ++ arp = arp_find_entry ( ll_protocol, net_protocol, dest_net_addr ); ++ if ( arp ) { ++ DBG ( "ARP cache hit: %s %s => %s %s\n", ++ net_protocol->name, net_protocol->ntoa ( arp->net_addr ), ++ ll_protocol->name, ll_protocol->ntoa ( arp->ll_addr ) ); ++ memcpy ( dest_ll_addr, arp->ll_addr, ll_protocol->ll_addr_len); ++ return 0; ++ } ++ DBG ( "ARP cache miss: %s %s\n", net_protocol->name, ++ net_protocol->ntoa ( dest_net_addr ) ); ++ ++ /* Allocate ARP packet */ ++ iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *arphdr ) + ++ 2 * ( MAX_LL_ADDR_LEN + MAX_NET_ADDR_LEN ) ); ++ if ( ! iobuf ) ++ return -ENOMEM; ++ iob_reserve ( iobuf, MAX_LL_HEADER_LEN ); ++ ++ /* Build up ARP request */ ++ arphdr = iob_put ( iobuf, sizeof ( *arphdr ) ); ++ arphdr->ar_hrd = ll_protocol->ll_proto; ++ arphdr->ar_hln = ll_protocol->ll_addr_len; ++ arphdr->ar_pro = net_protocol->net_proto; ++ arphdr->ar_pln = net_protocol->net_addr_len; ++ arphdr->ar_op = htons ( ARPOP_REQUEST ); ++ memcpy ( iob_put ( iobuf, ll_protocol->ll_addr_len ), ++ netdev->ll_addr, ll_protocol->ll_addr_len ); ++ memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ), ++ source_net_addr, net_protocol->net_addr_len ); ++ memset ( iob_put ( iobuf, ll_protocol->ll_addr_len ), ++ 0, ll_protocol->ll_addr_len ); ++ memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ), ++ dest_net_addr, net_protocol->net_addr_len ); ++ ++ /* Transmit ARP request */ ++ if ( ( rc = net_tx ( iobuf, netdev, &arp_protocol, ++ netdev->ll_broadcast, netdev->ll_addr ) ) != 0 ) ++ return rc; ++ ++ return -ENOENT; ++} ++ ++/** ++ * Identify ARP protocol ++ * ++ * @v net_proto Network-layer protocol, in network-endian order ++ * @ret arp_net_protocol ARP protocol, or NULL ++ * ++ */ ++static struct arp_net_protocol * arp_find_protocol ( uint16_t net_proto ) { ++ struct arp_net_protocol *arp_net_protocol; ++ ++ for_each_table_entry ( arp_net_protocol, ARP_NET_PROTOCOLS ) { ++ if ( arp_net_protocol->net_protocol->net_proto == net_proto ) { ++ return arp_net_protocol; ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * Process incoming ARP packets ++ * ++ * @v iobuf I/O buffer ++ * @v netdev Network device ++ * @v ll_source Link-layer source address ++ * @ret rc Return status code ++ * ++ * This handles ARP requests and responses as detailed in RFC826. The ++ * method detailed within the RFC is pretty optimised, handling ++ * requests and responses with basically a single code path and ++ * avoiding the need for extraneous ARP requests; read the RFC for ++ * details. ++ */ ++static int arp_rx ( struct io_buffer *iobuf, struct net_device *netdev, ++ const void *ll_dest __unused, ++ const void *ll_source __unused ) { ++ struct arphdr *arphdr = iobuf->data; ++ struct arp_net_protocol *arp_net_protocol; ++ struct net_protocol *net_protocol; ++ struct ll_protocol *ll_protocol; ++ struct arp_entry *arp; ++ int merge = 0; ++ ++ /* Identify network-layer and link-layer protocols */ ++ arp_net_protocol = arp_find_protocol ( arphdr->ar_pro ); ++ if ( ! arp_net_protocol ) ++ goto done; ++ net_protocol = arp_net_protocol->net_protocol; ++ ll_protocol = netdev->ll_protocol; ++ ++ /* Sanity checks */ ++ if ( ( arphdr->ar_hrd != ll_protocol->ll_proto ) || ++ ( arphdr->ar_hln != ll_protocol->ll_addr_len ) || ++ ( arphdr->ar_pln != net_protocol->net_addr_len ) ) ++ goto done; ++ ++ /* See if we have an entry for this sender, and update it if so */ ++ arp = arp_find_entry ( ll_protocol, net_protocol, ++ arp_sender_pa ( arphdr ) ); ++ if ( arp ) { ++ memcpy ( arp->ll_addr, arp_sender_ha ( arphdr ), ++ arphdr->ar_hln ); ++ merge = 1; ++ DBG ( "ARP cache update: %s %s => %s %s\n", ++ net_protocol->name, net_protocol->ntoa ( arp->net_addr ), ++ ll_protocol->name, ll_protocol->ntoa ( arp->ll_addr ) ); ++ } ++ ++ /* See if we own the target protocol address */ ++ if ( arp_net_protocol->check ( netdev, arp_target_pa ( arphdr ) ) != 0) ++ goto done; ++ ++ /* Create new ARP table entry if necessary */ ++ if ( ! merge ) { ++ arp = &arp_table[next_new_arp_entry++ % NUM_ARP_ENTRIES]; ++ arp->ll_protocol = ll_protocol; ++ arp->net_protocol = net_protocol; ++ memcpy ( arp->ll_addr, arp_sender_ha ( arphdr ), ++ arphdr->ar_hln ); ++ memcpy ( arp->net_addr, arp_sender_pa ( arphdr ), ++ arphdr->ar_pln); ++ DBG ( "ARP cache add: %s %s => %s %s\n", ++ net_protocol->name, net_protocol->ntoa ( arp->net_addr ), ++ ll_protocol->name, ll_protocol->ntoa ( arp->ll_addr ) ); ++ } ++ ++ /* If it's not a request, there's nothing more to do */ ++ if ( arphdr->ar_op != htons ( ARPOP_REQUEST ) ) ++ goto done; ++ ++ /* Change request to a reply */ ++ DBG ( "ARP reply: %s %s => %s %s\n", net_protocol->name, ++ net_protocol->ntoa ( arp_target_pa ( arphdr ) ), ++ ll_protocol->name, ll_protocol->ntoa ( netdev->ll_addr ) ); ++ arphdr->ar_op = htons ( ARPOP_REPLY ); ++ memswap ( arp_sender_ha ( arphdr ), arp_target_ha ( arphdr ), ++ arphdr->ar_hln + arphdr->ar_pln ); ++ memcpy ( arp_sender_ha ( arphdr ), netdev->ll_addr, arphdr->ar_hln ); ++ ++ /* Send reply */ ++ net_tx ( iob_disown ( iobuf ), netdev, &arp_protocol, ++ arp_target_ha ( arphdr ), netdev->ll_addr ); ++ ++ done: ++ free_iob ( iobuf ); ++ return 0; ++} ++ ++/** ++ * Transcribe ARP address ++ * ++ * @v net_addr ARP address ++ * @ret string "" ++ * ++ * This operation is meaningless for the ARP protocol. ++ */ ++static const char * arp_ntoa ( const void *net_addr __unused ) { ++ return ""; ++} ++ ++/** ARP protocol */ ++struct net_protocol arp_protocol __net_protocol = { ++ .name = "ARP", ++ .net_proto = htons ( ETH_P_ARP ), ++ .rx = arp_rx, ++ .ntoa = arp_ntoa, ++}; +diff -urN ipxe-configged/src/net/icmp.c ipxe-droppackets/src/net/icmp.c +--- ipxe-configged/src/net/icmp.c 2010-11-11 14:09:48.132334810 -0500 ++++ ipxe-droppackets/src/net/icmp.c 2010-11-12 10:33:14.482334325 -0500 +@@ -69,11 +69,14 @@ + } + + /* We respond only to pings */ +- if ( icmp->type != ICMP_ECHO_REQUEST ) { ++ /* Always discard, responding to pings is more than most systems do in this state and ++ * gPXE is in a relatively precarious position, resource management wise, and thus it is ++ * easier just to disable this function. */ ++ /* if ( icmp->type != ICMP_ECHO_REQUEST ) { */ + DBG ( "ICMP ignoring type %d\n", icmp->type ); + rc = 0; + goto done; +- } ++ /* } */ + + DBG ( "ICMP responding to ping\n" ); + +diff -urN ipxe-configged/src/net/icmp.c.orig ipxe-droppackets/src/net/icmp.c.orig +--- ipxe-configged/src/net/icmp.c.orig 1969-12-31 19:00:00.000000000 -0500 ++++ ipxe-droppackets/src/net/icmp.c.orig 2010-11-11 14:09:48.132334810 -0500 +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (C) 2009 Michael Brown . ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++FILE_LICENCE ( GPL2_OR_LATER ); ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** @file ++ * ++ * ICMP protocol ++ * ++ */ ++ ++struct tcpip_protocol icmp_protocol __tcpip_protocol; ++ ++/** ++ * Process a received packet ++ * ++ * @v iobuf I/O buffer ++ * @v st_src Partially-filled source address ++ * @v st_dest Partially-filled destination address ++ * @v pshdr_csum Pseudo-header checksum ++ * @ret rc Return status code ++ */ ++static int icmp_rx ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src, ++ struct sockaddr_tcpip *st_dest, ++ uint16_t pshdr_csum __unused ) { ++ struct icmp_header *icmp = iobuf->data; ++ size_t len = iob_len ( iobuf ); ++ unsigned int csum; ++ int rc; ++ ++ /* Sanity check */ ++ if ( len < sizeof ( *icmp ) ) { ++ DBG ( "ICMP packet too short at %zd bytes (min %zd bytes)\n", ++ len, sizeof ( *icmp ) ); ++ rc = -EINVAL; ++ goto done; ++ } ++ ++ /* Verify checksum */ ++ csum = tcpip_chksum ( icmp, len ); ++ if ( csum != 0 ) { ++ DBG ( "ICMP checksum incorrect (is %04x, should be 0000)\n", ++ csum ); ++ DBG_HD ( icmp, len ); ++ rc = -EINVAL; ++ goto done; ++ } ++ ++ /* We respond only to pings */ ++ if ( icmp->type != ICMP_ECHO_REQUEST ) { ++ DBG ( "ICMP ignoring type %d\n", icmp->type ); ++ rc = 0; ++ goto done; ++ } ++ ++ DBG ( "ICMP responding to ping\n" ); ++ ++ /* Change type to response and recalculate checksum */ ++ icmp->type = ICMP_ECHO_RESPONSE; ++ icmp->chksum = 0; ++ icmp->chksum = tcpip_chksum ( icmp, len ); ++ ++ /* Transmit the response */ ++ if ( ( rc = tcpip_tx ( iob_disown ( iobuf ), &icmp_protocol, st_dest, ++ st_src, NULL, NULL ) ) != 0 ) { ++ DBG ( "ICMP could not transmit ping response: %s\n", ++ strerror ( rc ) ); ++ goto done; ++ } ++ ++ done: ++ free_iob ( iobuf ); ++ return rc; ++} ++ ++/** ICMP TCP/IP protocol */ ++struct tcpip_protocol icmp_protocol __tcpip_protocol = { ++ .name = "ICMP", ++ .rx = icmp_rx, ++ .tcpip_proto = IP_ICMP, ++}; +diff -urN ipxe-configged/src/net/tcp.c ipxe-droppackets/src/net/tcp.c +--- ipxe-configged/src/net/tcp.c 2010-11-11 14:09:48.132334810 -0500 ++++ ipxe-droppackets/src/net/tcp.c 2010-11-12 10:33:14.482334325 -0500 +@@ -1119,7 +1119,11 @@ + + /* If no connection was found, send RST */ + if ( ! tcp ) { +- tcp_xmit_reset ( tcp, st_src, tcphdr ); ++ /* We simply drop unrecognized TCP connections. Hopefully, this is no worse than I typically set up a firewall. ++ * If we try to even remotely respond to random TCP streams, it can exhaust gPXE resources easily in a very large ++ * environment */ ++ /* tcp_xmit_reset ( tcp, st_src, tcphdr ); Don't consume precious outbound resource for irrelevant communication. */ ++ + rc = -ENOTCONN; + goto discard; + } +diff -urN ipxe-configged/src/net/tcp.c.orig ipxe-droppackets/src/net/tcp.c.orig +--- ipxe-configged/src/net/tcp.c.orig 1969-12-31 19:00:00.000000000 -0500 ++++ ipxe-droppackets/src/net/tcp.c.orig 2010-11-11 14:09:48.132334810 -0500 +@@ -0,0 +1,1336 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** @file ++ * ++ * TCP protocol ++ * ++ */ ++ ++FILE_LICENCE ( GPL2_OR_LATER ); ++ ++/** A TCP connection */ ++struct tcp_connection { ++ /** Reference counter */ ++ struct refcnt refcnt; ++ /** List of TCP connections */ ++ struct list_head list; ++ ++ /** Flags */ ++ unsigned int flags; ++ ++ /** Data transfer interface */ ++ struct interface xfer; ++ ++ /** Remote socket address */ ++ struct sockaddr_tcpip peer; ++ /** Local port */ ++ unsigned int local_port; ++ ++ /** Current TCP state */ ++ unsigned int tcp_state; ++ /** Previous TCP state ++ * ++ * Maintained only for debug messages ++ */ ++ unsigned int prev_tcp_state; ++ /** Current sequence number ++ * ++ * Equivalent to SND.UNA in RFC 793 terminology. ++ */ ++ uint32_t snd_seq; ++ /** Unacknowledged sequence count ++ * ++ * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology. ++ */ ++ uint32_t snd_sent; ++ /** Send window ++ * ++ * Equivalent to SND.WND in RFC 793 terminology ++ */ ++ uint32_t snd_win; ++ /** Current acknowledgement number ++ * ++ * Equivalent to RCV.NXT in RFC 793 terminology. ++ */ ++ uint32_t rcv_ack; ++ /** Receive window ++ * ++ * Equivalent to RCV.WND in RFC 793 terminology. ++ */ ++ uint32_t rcv_win; ++ /** Most recent received timestamp ++ * ++ * Equivalent to TS.Recent in RFC 1323 terminology. ++ */ ++ uint32_t ts_recent; ++ ++ /** Transmit queue */ ++ struct list_head tx_queue; ++ /** Receive queue */ ++ struct list_head rx_queue; ++ /** Retransmission timer */ ++ struct retry_timer timer; ++ /** Shutdown (TIME_WAIT) timer */ ++ struct retry_timer wait; ++}; ++ ++/** TCP flags */ ++enum tcp_flags { ++ /** TCP data transfer interface has been closed */ ++ TCP_XFER_CLOSED = 0x0001, ++ /** TCP timestamps are enabled */ ++ TCP_TS_ENABLED = 0x0002, ++ /** TCP acknowledgement is pending */ ++ TCP_ACK_PENDING = 0x0004, ++}; ++ ++/** TCP internal header ++ * ++ * This is the header that replaces the TCP header for packets ++ * enqueued on the receive queue. ++ */ ++struct tcp_rx_queued_header { ++ /** SEQ value, in host-endian order ++ * ++ * This represents the SEQ value at the time the packet is ++ * enqueued, and so excludes the SYN, if present. ++ */ ++ uint32_t seq; ++ /** Flags ++ * ++ * Only FIN is valid within this flags byte; all other flags ++ * have already been processed by the time the packet is ++ * enqueued. ++ */ ++ uint8_t flags; ++ /** Reserved */ ++ uint8_t reserved[3]; ++}; ++ ++/** ++ * List of registered TCP connections ++ */ ++static LIST_HEAD ( tcp_conns ); ++ ++/* Forward declarations */ ++static struct interface_descriptor tcp_xfer_desc; ++static void tcp_expired ( struct retry_timer *timer, int over ); ++static void tcp_wait_expired ( struct retry_timer *timer, int over ); ++static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, ++ uint32_t win ); ++ ++/** ++ * Name TCP state ++ * ++ * @v state TCP state ++ * @ret name Name of TCP state ++ */ ++static inline __attribute__ (( always_inline )) const char * ++tcp_state ( int state ) { ++ switch ( state ) { ++ case TCP_CLOSED: return "CLOSED"; ++ case TCP_LISTEN: return "LISTEN"; ++ case TCP_SYN_SENT: return "SYN_SENT"; ++ case TCP_SYN_RCVD: return "SYN_RCVD"; ++ case TCP_ESTABLISHED: return "ESTABLISHED"; ++ case TCP_FIN_WAIT_1: return "FIN_WAIT_1"; ++ case TCP_FIN_WAIT_2: return "FIN_WAIT_2"; ++ case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK"; ++ case TCP_TIME_WAIT: return "TIME_WAIT"; ++ case TCP_CLOSE_WAIT: return "CLOSE_WAIT"; ++ default: return "INVALID"; ++ } ++} ++ ++/** ++ * Dump TCP state transition ++ * ++ * @v tcp TCP connection ++ */ ++static inline __attribute__ (( always_inline )) void ++tcp_dump_state ( struct tcp_connection *tcp ) { ++ ++ if ( tcp->tcp_state != tcp->prev_tcp_state ) { ++ DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp, ++ tcp_state ( tcp->prev_tcp_state ), ++ tcp_state ( tcp->tcp_state ) ); ++ } ++ tcp->prev_tcp_state = tcp->tcp_state; ++} ++ ++/** ++ * Dump TCP flags ++ * ++ * @v flags TCP flags ++ */ ++static inline __attribute__ (( always_inline )) void ++tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) { ++ if ( flags & TCP_RST ) ++ DBGC2 ( tcp, " RST" ); ++ if ( flags & TCP_SYN ) ++ DBGC2 ( tcp, " SYN" ); ++ if ( flags & TCP_PSH ) ++ DBGC2 ( tcp, " PSH" ); ++ if ( flags & TCP_FIN ) ++ DBGC2 ( tcp, " FIN" ); ++ if ( flags & TCP_ACK ) ++ DBGC2 ( tcp, " ACK" ); ++} ++ ++/*************************************************************************** ++ * ++ * Open and close ++ * ++ *************************************************************************** ++ */ ++ ++/** ++ * Bind TCP connection to local port ++ * ++ * @v tcp TCP connection ++ * @v port Local port number ++ * @ret rc Return status code ++ * ++ * If the port is 0, the connection is assigned an available port ++ * between 1024 and 65535. ++ */ ++static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) { ++ struct tcp_connection *existing; ++ uint16_t try_port; ++ unsigned int i; ++ ++ /* If no port is specified, find an available port */ ++ if ( ! port ) { ++ try_port = random(); ++ for ( i = 0 ; i < 65536 ; i++ ) { ++ try_port++; ++ if ( try_port < 1024 ) ++ continue; ++ if ( tcp_bind ( tcp, try_port ) == 0 ) ++ return 0; ++ } ++ DBGC ( tcp, "TCP %p could not bind: no free ports\n", tcp ); ++ return -EADDRINUSE; ++ } ++ ++ /* Attempt bind to local port */ ++ list_for_each_entry ( existing, &tcp_conns, list ) { ++ if ( existing->local_port == port ) { ++ DBGC ( tcp, "TCP %p could not bind: port %d in use\n", ++ tcp, port ); ++ return -EADDRINUSE; ++ } ++ } ++ tcp->local_port = port; ++ ++ DBGC ( tcp, "TCP %p bound to port %d\n", tcp, port ); ++ return 0; ++} ++ ++/** ++ * Open a TCP connection ++ * ++ * @v xfer Data transfer interface ++ * @v peer Peer socket address ++ * @v local Local socket address, or NULL ++ * @ret rc Return status code ++ */ ++static int tcp_open ( struct interface *xfer, struct sockaddr *peer, ++ struct sockaddr *local ) { ++ struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; ++ struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; ++ struct tcp_connection *tcp; ++ unsigned int bind_port; ++ int rc; ++ ++ /* Allocate and initialise structure */ ++ tcp = zalloc ( sizeof ( *tcp ) ); ++ if ( ! tcp ) ++ return -ENOMEM; ++ DBGC ( tcp, "TCP %p allocated\n", tcp ); ++ ref_init ( &tcp->refcnt, NULL ); ++ intf_init ( &tcp->xfer, &tcp_xfer_desc, &tcp->refcnt ); ++ timer_init ( &tcp->timer, tcp_expired, &tcp->refcnt ); ++ timer_init ( &tcp->wait, tcp_wait_expired, &tcp->refcnt ); ++ tcp->prev_tcp_state = TCP_CLOSED; ++ tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN ); ++ tcp_dump_state ( tcp ); ++ tcp->snd_seq = random(); ++ INIT_LIST_HEAD ( &tcp->tx_queue ); ++ INIT_LIST_HEAD ( &tcp->rx_queue ); ++ memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) ); ++ ++ /* Bind to local port */ ++ bind_port = ( st_local ? ntohs ( st_local->st_port ) : 0 ); ++ if ( ( rc = tcp_bind ( tcp, bind_port ) ) != 0 ) ++ goto err; ++ ++ /* Start timer to initiate SYN */ ++ start_timer_nodelay ( &tcp->timer ); ++ ++ /* Attach parent interface, transfer reference to connection ++ * list and return ++ */ ++ intf_plug_plug ( &tcp->xfer, xfer ); ++ list_add ( &tcp->list, &tcp_conns ); ++ return 0; ++ ++ err: ++ ref_put ( &tcp->refcnt ); ++ return rc; ++} ++ ++/** ++ * Close TCP connection ++ * ++ * @v tcp TCP connection ++ * @v rc Reason for close ++ * ++ * Closes the data transfer interface. If the TCP state machine is in ++ * a suitable state, the connection will be deleted. ++ */ ++static void tcp_close ( struct tcp_connection *tcp, int rc ) { ++ struct io_buffer *iobuf; ++ struct io_buffer *tmp; ++ ++ /* Close data transfer interface */ ++ intf_shutdown ( &tcp->xfer, rc ); ++ tcp->flags |= TCP_XFER_CLOSED; ++ ++ /* If we are in CLOSED, or have otherwise not yet received a ++ * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the ++ * connection. ++ */ ++ if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { ++ ++ /* Transition to CLOSED for the sake of debugging messages */ ++ tcp->tcp_state = TCP_CLOSED; ++ tcp_dump_state ( tcp ); ++ ++ /* Free any unprocessed I/O buffers */ ++ list_for_each_entry_safe ( iobuf, tmp, &tcp->rx_queue, list ) { ++ list_del ( &iobuf->list ); ++ free_iob ( iobuf ); ++ } ++ ++ /* Free any unsent I/O buffers */ ++ list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) { ++ list_del ( &iobuf->list ); ++ free_iob ( iobuf ); ++ } ++ ++ /* Remove from list and drop reference */ ++ stop_timer ( &tcp->timer ); ++ list_del ( &tcp->list ); ++ ref_put ( &tcp->refcnt ); ++ DBGC ( tcp, "TCP %p connection deleted\n", tcp ); ++ return; ++ } ++ ++ /* If we have not had our SYN acknowledged (i.e. we are in ++ * SYN_RCVD), pretend that it has been acknowledged so that we ++ * can send a FIN without breaking things. ++ */ ++ if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) ++ tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 ); ++ ++ /* If we have no data remaining to send, start sending FIN */ ++ if ( list_empty ( &tcp->tx_queue ) ) { ++ tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); ++ tcp_dump_state ( tcp ); ++ } ++} ++ ++/*************************************************************************** ++ * ++ * Transmit data path ++ * ++ *************************************************************************** ++ */ ++ ++/** ++ * Calculate transmission window ++ * ++ * @v tcp TCP connection ++ * @ret len Maximum length that can be sent in a single packet ++ */ ++static size_t tcp_xmit_win ( struct tcp_connection *tcp ) { ++ size_t len; ++ ++ /* Not ready if we're not in a suitable connection state */ ++ if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) ++ return 0; ++ ++ /* Length is the minimum of the receiver's window and the path MTU */ ++ len = tcp->snd_win; ++ if ( len > TCP_PATH_MTU ) ++ len = TCP_PATH_MTU; ++ ++ return len; ++} ++ ++/** ++ * Process TCP transmit queue ++ * ++ * @v tcp TCP connection ++ * @v max_len Maximum length to process ++ * @v dest I/O buffer to fill with data, or NULL ++ * @v remove Remove data from queue ++ * @ret len Length of data processed ++ * ++ * This processes at most @c max_len bytes from the TCP connection's ++ * transmit queue. Data will be copied into the @c dest I/O buffer ++ * (if provided) and, if @c remove is true, removed from the transmit ++ * queue. ++ */ ++static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len, ++ struct io_buffer *dest, int remove ) { ++ struct io_buffer *iobuf; ++ struct io_buffer *tmp; ++ size_t frag_len; ++ size_t len = 0; ++ ++ list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) { ++ frag_len = iob_len ( iobuf ); ++ if ( frag_len > max_len ) ++ frag_len = max_len; ++ if ( dest ) { ++ memcpy ( iob_put ( dest, frag_len ), iobuf->data, ++ frag_len ); ++ } ++ if ( remove ) { ++ iob_pull ( iobuf, frag_len ); ++ if ( ! iob_len ( iobuf ) ) { ++ list_del ( &iobuf->list ); ++ free_iob ( iobuf ); ++ } ++ } ++ len += frag_len; ++ max_len -= frag_len; ++ } ++ return len; ++} ++ ++/** ++ * Transmit any outstanding data ++ * ++ * @v tcp TCP connection ++ * ++ * Transmits any outstanding data on the connection. ++ * ++ * Note that even if an error is returned, the retransmission timer ++ * will have been started if necessary, and so the stack will ++ * eventually attempt to retransmit the failed packet. ++ */ ++static int tcp_xmit ( struct tcp_connection *tcp ) { ++ struct io_buffer *iobuf; ++ struct tcp_header *tcphdr; ++ struct tcp_mss_option *mssopt; ++ struct tcp_timestamp_padded_option *tsopt; ++ void *payload; ++ unsigned int flags; ++ size_t len = 0; ++ uint32_t seq_len; ++ uint32_t app_win; ++ uint32_t max_rcv_win; ++ int rc; ++ ++ /* If retransmission timer is already running, do nothing */ ++ if ( timer_running ( &tcp->timer ) ) ++ return 0; ++ ++ /* Calculate both the actual (payload) and sequence space ++ * lengths that we wish to transmit. ++ */ ++ if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) { ++ len = tcp_process_tx_queue ( tcp, tcp_xmit_win ( tcp ), ++ NULL, 0 ); ++ } ++ seq_len = len; ++ flags = TCP_FLAGS_SENDING ( tcp->tcp_state ); ++ if ( flags & ( TCP_SYN | TCP_FIN ) ) { ++ /* SYN or FIN consume one byte, and we can never send both */ ++ assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) ); ++ seq_len++; ++ } ++ tcp->snd_sent = seq_len; ++ ++ /* If we have nothing to transmit, stop now */ ++ if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) ) ++ return 0; ++ ++ /* If we are transmitting anything that requires ++ * acknowledgement (i.e. consumes sequence space), start the ++ * retransmission timer. Do this before attempting to ++ * allocate the I/O buffer, in case allocation itself fails. ++ */ ++ if ( seq_len ) ++ start_timer ( &tcp->timer ); ++ ++ /* Allocate I/O buffer */ ++ iobuf = alloc_iob ( len + MAX_HDR_LEN ); ++ if ( ! iobuf ) { ++ DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x " ++ "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ), ++ tcp->rcv_ack ); ++ return -ENOMEM; ++ } ++ iob_reserve ( iobuf, MAX_HDR_LEN ); ++ ++ /* Fill data payload from transmit queue */ ++ tcp_process_tx_queue ( tcp, len, iobuf, 0 ); ++ ++ /* Expand receive window if possible */ ++ max_rcv_win = ( ( freemem * 3 ) / 4 ); ++ if ( max_rcv_win > TCP_MAX_WINDOW_SIZE ) ++ max_rcv_win = TCP_MAX_WINDOW_SIZE; ++ app_win = xfer_window ( &tcp->xfer ); ++ if ( max_rcv_win > app_win ) ++ max_rcv_win = app_win; ++ max_rcv_win &= ~0x03; /* Keep everything dword-aligned */ ++ if ( tcp->rcv_win < max_rcv_win ) ++ tcp->rcv_win = max_rcv_win; ++ ++ /* Fill up the TCP header */ ++ payload = iobuf->data; ++ if ( flags & TCP_SYN ) { ++ mssopt = iob_push ( iobuf, sizeof ( *mssopt ) ); ++ mssopt->kind = TCP_OPTION_MSS; ++ mssopt->length = sizeof ( *mssopt ); ++ mssopt->mss = htons ( TCP_MSS ); ++ } ++ if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) { ++ tsopt = iob_push ( iobuf, sizeof ( *tsopt ) ); ++ memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) ); ++ tsopt->tsopt.kind = TCP_OPTION_TS; ++ tsopt->tsopt.length = sizeof ( tsopt->tsopt ); ++ tsopt->tsopt.tsval = htonl ( currticks() ); ++ tsopt->tsopt.tsecr = htonl ( tcp->ts_recent ); ++ } ++ if ( len != 0 ) ++ flags |= TCP_PSH; ++ tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); ++ memset ( tcphdr, 0, sizeof ( *tcphdr ) ); ++ tcphdr->src = htons ( tcp->local_port ); ++ tcphdr->dest = tcp->peer.st_port; ++ tcphdr->seq = htonl ( tcp->snd_seq ); ++ tcphdr->ack = htonl ( tcp->rcv_ack ); ++ tcphdr->hlen = ( ( payload - iobuf->data ) << 2 ); ++ tcphdr->flags = flags; ++ tcphdr->win = htons ( tcp->rcv_win ); ++ tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); ++ ++ /* Dump header */ ++ DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd", ++ tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), ++ ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ), ++ ntohl ( tcphdr->ack ), len ); ++ tcp_dump_flags ( tcp, tcphdr->flags ); ++ DBGC2 ( tcp, "\n" ); ++ ++ /* Transmit packet */ ++ if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL, ++ &tcphdr->csum ) ) != 0 ) { ++ DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n", ++ tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), ++ tcp->rcv_ack, strerror ( rc ) ); ++ return rc; ++ } ++ ++ /* Clear ACK-pending flag */ ++ tcp->flags &= ~TCP_ACK_PENDING; ++ ++ return 0; ++} ++ ++/** ++ * Retransmission timer expired ++ * ++ * @v timer Retransmission timer ++ * @v over Failure indicator ++ */ ++static void tcp_expired ( struct retry_timer *timer, int over ) { ++ struct tcp_connection *tcp = ++ container_of ( timer, struct tcp_connection, timer ); ++ ++ DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp, ++ ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ), ++ tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); ++ ++ assert ( ( tcp->tcp_state == TCP_SYN_SENT ) || ++ ( tcp->tcp_state == TCP_SYN_RCVD ) || ++ ( tcp->tcp_state == TCP_ESTABLISHED ) || ++ ( tcp->tcp_state == TCP_FIN_WAIT_1 ) || ++ ( tcp->tcp_state == TCP_CLOSE_WAIT ) || ++ ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) ); ++ ++ if ( over ) { ++ /* If we have finally timed out and given up, ++ * terminate the connection ++ */ ++ tcp->tcp_state = TCP_CLOSED; ++ tcp_dump_state ( tcp ); ++ tcp_close ( tcp, -ETIMEDOUT ); ++ } else { ++ /* Otherwise, retransmit the packet */ ++ tcp_xmit ( tcp ); ++ } ++} ++ ++/** ++ * Shutdown timer expired ++ * ++ * @v timer Shutdown timer ++ * @v over Failure indicator ++ */ ++static void tcp_wait_expired ( struct retry_timer *timer, int over __unused ) { ++ struct tcp_connection *tcp = ++ container_of ( timer, struct tcp_connection, wait ); ++ ++ assert ( tcp->tcp_state == TCP_TIME_WAIT ); ++ ++ DBGC ( tcp, "TCP %p wait complete in %s for %08x..%08x %08x\n", tcp, ++ tcp_state ( tcp->tcp_state ), tcp->snd_seq, ++ ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); ++ ++ tcp->tcp_state = TCP_CLOSED; ++ tcp_dump_state ( tcp ); ++ tcp_close ( tcp, 0 ); ++} ++ ++/** ++ * Send RST response to incoming packet ++ * ++ * @v in_tcphdr TCP header of incoming packet ++ * @ret rc Return status code ++ */ ++static int tcp_xmit_reset ( struct tcp_connection *tcp, ++ struct sockaddr_tcpip *st_dest, ++ struct tcp_header *in_tcphdr ) { ++ struct io_buffer *iobuf; ++ struct tcp_header *tcphdr; ++ int rc; ++ ++ /* Allocate space for dataless TX buffer */ ++ iobuf = alloc_iob ( MAX_HDR_LEN ); ++ if ( ! iobuf ) { ++ DBGC ( tcp, "TCP %p could not allocate iobuf for RST " ++ "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ), ++ ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) ); ++ return -ENOMEM; ++ } ++ iob_reserve ( iobuf, MAX_HDR_LEN ); ++ ++ /* Construct RST response */ ++ tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); ++ memset ( tcphdr, 0, sizeof ( *tcphdr ) ); ++ tcphdr->src = in_tcphdr->dest; ++ tcphdr->dest = in_tcphdr->src; ++ tcphdr->seq = in_tcphdr->ack; ++ tcphdr->ack = in_tcphdr->seq; ++ tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 ); ++ tcphdr->flags = ( TCP_RST | TCP_ACK ); ++ tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE ); ++ tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); ++ ++ /* Dump header */ ++ DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d", ++ tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), ++ ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ), ++ ntohl ( tcphdr->ack ), 0 ); ++ tcp_dump_flags ( tcp, tcphdr->flags ); ++ DBGC2 ( tcp, "\n" ); ++ ++ /* Transmit packet */ ++ if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest, ++ NULL, &tcphdr->csum ) ) != 0 ) { ++ DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: " ++ "%s\n", tcp, ntohl ( in_tcphdr->ack ), ++ ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ), ++ strerror ( rc ) ); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++/*************************************************************************** ++ * ++ * Receive data path ++ * ++ *************************************************************************** ++ */ ++ ++/** ++ * Identify TCP connection by local port number ++ * ++ * @v local_port Local port ++ * @ret tcp TCP connection, or NULL ++ */ ++static struct tcp_connection * tcp_demux ( unsigned int local_port ) { ++ struct tcp_connection *tcp; ++ ++ list_for_each_entry ( tcp, &tcp_conns, list ) { ++ if ( tcp->local_port == local_port ) ++ return tcp; ++ } ++ return NULL; ++} ++ ++/** ++ * Parse TCP received options ++ * ++ * @v tcp TCP connection ++ * @v data Raw options data ++ * @v len Raw options length ++ * @v options Options structure to fill in ++ */ ++static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data, ++ size_t len, struct tcp_options *options ) { ++ const void *end = ( data + len ); ++ const struct tcp_option *option; ++ unsigned int kind; ++ ++ memset ( options, 0, sizeof ( *options ) ); ++ while ( data < end ) { ++ option = data; ++ kind = option->kind; ++ if ( kind == TCP_OPTION_END ) ++ return; ++ if ( kind == TCP_OPTION_NOP ) { ++ data++; ++ continue; ++ } ++ switch ( kind ) { ++ case TCP_OPTION_MSS: ++ options->mssopt = data; ++ break; ++ case TCP_OPTION_TS: ++ options->tsopt = data; ++ break; ++ default: ++ DBGC ( tcp, "TCP %p received unknown option %d\n", ++ tcp, kind ); ++ break; ++ } ++ data += option->length; ++ } ++} ++ ++/** ++ * Consume received sequence space ++ * ++ * @v tcp TCP connection ++ * @v seq_len Sequence space length to consume ++ */ ++static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) { ++ tcp->rcv_ack += seq_len; ++ if ( tcp->rcv_win > seq_len ) { ++ tcp->rcv_win -= seq_len; ++ } else { ++ tcp->rcv_win = 0; ++ } ++ tcp->flags |= TCP_ACK_PENDING; ++} ++ ++/** ++ * Handle TCP received SYN ++ * ++ * @v tcp TCP connection ++ * @v seq SEQ value (in host-endian order) ++ * @v options TCP options ++ * @ret rc Return status code ++ */ ++static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq, ++ struct tcp_options *options ) { ++ ++ /* Synchronise sequence numbers on first SYN */ ++ if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { ++ tcp->rcv_ack = seq; ++ if ( options->tsopt ) ++ tcp->flags |= TCP_TS_ENABLED; ++ } ++ ++ /* Ignore duplicate SYN */ ++ if ( seq != tcp->rcv_ack ) ++ return 0; ++ ++ /* Acknowledge SYN */ ++ tcp_rx_seq ( tcp, 1 ); ++ ++ /* Mark SYN as received and start sending ACKs with each packet */ ++ tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) | ++ TCP_STATE_RCVD ( TCP_SYN ) ); ++ ++ return 0; ++} ++ ++/** ++ * Handle TCP received ACK ++ * ++ * @v tcp TCP connection ++ * @v ack ACK value (in host-endian order) ++ * @v win WIN value (in host-endian order) ++ * @ret rc Return status code ++ */ ++static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, ++ uint32_t win ) { ++ uint32_t ack_len = ( ack - tcp->snd_seq ); ++ size_t len; ++ unsigned int acked_flags; ++ ++ /* Check for out-of-range or old duplicate ACKs */ ++ if ( ack_len > tcp->snd_sent ) { ++ DBGC ( tcp, "TCP %p received ACK for %08x..%08x, " ++ "sent only %08x..%08x\n", tcp, tcp->snd_seq, ++ ( tcp->snd_seq + ack_len ), tcp->snd_seq, ++ ( tcp->snd_seq + tcp->snd_sent ) ); ++ ++ if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) { ++ /* Just ignore what might be old duplicate ACKs */ ++ return 0; ++ } else { ++ /* Send RST if an out-of-range ACK is received ++ * on a not-yet-established connection, as per ++ * RFC 793. ++ */ ++ return -EINVAL; ++ } ++ } ++ ++ /* Ignore ACKs that don't actually acknowledge any new data. ++ * (In particular, do not stop the retransmission timer; this ++ * avoids creating a sorceror's apprentice syndrome when a ++ * duplicate ACK is received and we still have data in our ++ * transmit queue.) ++ */ ++ if ( ack_len == 0 ) ++ return 0; ++ ++ /* Stop the retransmission timer */ ++ stop_timer ( &tcp->timer ); ++ ++ /* Determine acknowledged flags and data length */ ++ len = ack_len; ++ acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) & ++ ( TCP_SYN | TCP_FIN ) ); ++ if ( acked_flags ) ++ len--; ++ ++ /* Update SEQ and sent counters, and window size */ ++ tcp->snd_seq = ack; ++ tcp->snd_sent = 0; ++ tcp->snd_win = win; ++ ++ /* Remove any acknowledged data from transmit queue */ ++ tcp_process_tx_queue ( tcp, len, NULL, 1 ); ++ ++ /* Mark SYN/FIN as acknowledged if applicable. */ ++ if ( acked_flags ) ++ tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags ); ++ ++ /* Start sending FIN if we've had all possible data ACKed */ ++ if ( list_empty ( &tcp->tx_queue ) && ( tcp->flags & TCP_XFER_CLOSED ) ) ++ tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); ++ ++ return 0; ++} ++ ++/** ++ * Handle TCP received data ++ * ++ * @v tcp TCP connection ++ * @v seq SEQ value (in host-endian order) ++ * @v iobuf I/O buffer ++ * @ret rc Return status code ++ * ++ * This function takes ownership of the I/O buffer. ++ */ ++static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq, ++ struct io_buffer *iobuf ) { ++ uint32_t already_rcvd; ++ uint32_t len; ++ int rc; ++ ++ /* Ignore duplicate or out-of-order data */ ++ already_rcvd = ( tcp->rcv_ack - seq ); ++ len = iob_len ( iobuf ); ++ if ( already_rcvd >= len ) { ++ free_iob ( iobuf ); ++ return 0; ++ } ++ iob_pull ( iobuf, already_rcvd ); ++ len -= already_rcvd; ++ ++ /* Acknowledge new data */ ++ tcp_rx_seq ( tcp, len ); ++ ++ /* Deliver data to application */ ++ if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) { ++ DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n", ++ tcp, seq, ( seq + len ), strerror ( rc ) ); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++/** ++ * Handle TCP received FIN ++ * ++ * @v tcp TCP connection ++ * @v seq SEQ value (in host-endian order) ++ * @ret rc Return status code ++ */ ++static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) { ++ ++ /* Ignore duplicate or out-of-order FIN */ ++ if ( seq != tcp->rcv_ack ) ++ return 0; ++ ++ /* Acknowledge FIN */ ++ tcp_rx_seq ( tcp, 1 ); ++ ++ /* Mark FIN as received */ ++ tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN ); ++ ++ /* Close connection */ ++ tcp_close ( tcp, 0 ); ++ ++ return 0; ++} ++ ++/** ++ * Handle TCP received RST ++ * ++ * @v tcp TCP connection ++ * @v seq SEQ value (in host-endian order) ++ * @ret rc Return status code ++ */ ++static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) { ++ ++ /* Accept RST only if it falls within the window. If we have ++ * not yet received a SYN, then we have no window to test ++ * against, so fall back to checking that our SYN has been ++ * ACKed. ++ */ ++ if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) { ++ if ( ! tcp_in_window ( seq, tcp->rcv_ack, tcp->rcv_win ) ) ++ return 0; ++ } else { ++ if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) ++ return 0; ++ } ++ ++ /* Abort connection */ ++ tcp->tcp_state = TCP_CLOSED; ++ tcp_dump_state ( tcp ); ++ tcp_close ( tcp, -ECONNRESET ); ++ ++ DBGC ( tcp, "TCP %p connection reset by peer\n", tcp ); ++ return -ECONNRESET; ++} ++ ++/** ++ * Enqueue received TCP packet ++ * ++ * @v tcp TCP connection ++ * @v seq SEQ value (in host-endian order) ++ * @v flags TCP flags ++ * @v iobuf I/O buffer ++ */ ++static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq, ++ uint8_t flags, struct io_buffer *iobuf ) { ++ struct tcp_rx_queued_header *tcpqhdr; ++ struct io_buffer *queued; ++ size_t len; ++ uint32_t seq_len; ++ ++ /* Calculate remaining flags and sequence length. Note that ++ * SYN, if present, has already been processed by this point. ++ */ ++ flags &= TCP_FIN; ++ len = iob_len ( iobuf ); ++ seq_len = ( len + ( flags ? 1 : 0 ) ); ++ ++ /* Discard immediately (to save memory) if: ++ * ++ * a) we have not yet received a SYN (and so have no defined ++ * receive window), or ++ * b) the packet lies entirely outside the receive window, or ++ * c) there is no further content to process. ++ */ ++ if ( ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) || ++ ( tcp_cmp ( seq, tcp->rcv_ack + tcp->rcv_win ) >= 0 ) || ++ ( tcp_cmp ( seq + seq_len, tcp->rcv_ack ) < 0 ) || ++ ( seq_len == 0 ) ) { ++ free_iob ( iobuf ); ++ return; ++ } ++ ++ /* Add internal header */ ++ tcpqhdr = iob_push ( iobuf, sizeof ( *tcpqhdr ) ); ++ tcpqhdr->seq = seq; ++ tcpqhdr->flags = flags; ++ ++ /* Add to RX queue */ ++ list_for_each_entry ( queued, &tcp->rx_queue, list ) { ++ tcpqhdr = queued->data; ++ if ( tcp_cmp ( seq, tcpqhdr->seq ) < 0 ) ++ break; ++ } ++ list_add_tail ( &iobuf->list, &queued->list ); ++} ++ ++/** ++ * Process receive queue ++ * ++ * @v tcp TCP connection ++ */ ++static void tcp_process_rx_queue ( struct tcp_connection *tcp ) { ++ struct io_buffer *iobuf; ++ struct tcp_rx_queued_header *tcpqhdr; ++ uint32_t seq; ++ unsigned int flags; ++ size_t len; ++ ++ /* Process all applicable received buffers. Note that we ++ * cannot use list_for_each_entry() to iterate over the RX ++ * queue, since tcp_discard() may remove packets from the RX ++ * queue while we are processing. ++ */ ++ while ( ( iobuf = list_first_entry ( &tcp->rx_queue, struct io_buffer, ++ list ) ) ) { ++ ++ /* Stop processing when we hit the first gap */ ++ tcpqhdr = iobuf->data; ++ if ( tcp_cmp ( tcpqhdr->seq, tcp->rcv_ack ) > 0 ) ++ break; ++ ++ /* Strip internal header and remove from RX queue */ ++ list_del ( &iobuf->list ); ++ seq = tcpqhdr->seq; ++ flags = tcpqhdr->flags; ++ iob_pull ( iobuf, sizeof ( *tcpqhdr ) ); ++ len = iob_len ( iobuf ); ++ ++ /* Handle new data, if any */ ++ tcp_rx_data ( tcp, seq, iob_disown ( iobuf ) ); ++ seq += len; ++ ++ /* Handle FIN, if present */ ++ if ( flags & TCP_FIN ) { ++ tcp_rx_fin ( tcp, seq ); ++ seq++; ++ } ++ } ++} ++ ++/** ++ * Process received packet ++ * ++ * @v iobuf I/O buffer ++ * @v st_src Partially-filled source address ++ * @v st_dest Partially-filled destination address ++ * @v pshdr_csum Pseudo-header checksum ++ * @ret rc Return status code ++ */ ++static int tcp_rx ( struct io_buffer *iobuf, ++ struct sockaddr_tcpip *st_src, ++ struct sockaddr_tcpip *st_dest __unused, ++ uint16_t pshdr_csum ) { ++ struct tcp_header *tcphdr = iobuf->data; ++ struct tcp_connection *tcp; ++ struct tcp_options options; ++ size_t hlen; ++ uint16_t csum; ++ uint32_t seq; ++ uint32_t ack; ++ uint32_t win; ++ unsigned int flags; ++ size_t len; ++ uint32_t seq_len; ++ int rc; ++ ++ /* Sanity check packet */ ++ if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) { ++ DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n", ++ iob_len ( iobuf ), sizeof ( *tcphdr ) ); ++ rc = -EINVAL; ++ goto discard; ++ } ++ hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4; ++ if ( hlen < sizeof ( *tcphdr ) ) { ++ DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n", ++ hlen, sizeof ( *tcphdr ) ); ++ rc = -EINVAL; ++ goto discard; ++ } ++ if ( hlen > iob_len ( iobuf ) ) { ++ DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n", ++ hlen, iob_len ( iobuf ) ); ++ rc = -EINVAL; ++ goto discard; ++ } ++ csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, ++ iob_len ( iobuf ) ); ++ if ( csum != 0 ) { ++ DBG ( "TCP checksum incorrect (is %04x including checksum " ++ "field, should be 0000)\n", csum ); ++ rc = -EINVAL; ++ goto discard; ++ } ++ ++ /* Parse parameters from header and strip header */ ++ tcp = tcp_demux ( ntohs ( tcphdr->dest ) ); ++ seq = ntohl ( tcphdr->seq ); ++ ack = ntohl ( tcphdr->ack ); ++ win = ntohs ( tcphdr->win ); ++ flags = tcphdr->flags; ++ tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ), ++ ( hlen - sizeof ( *tcphdr ) ), &options ); ++ iob_pull ( iobuf, hlen ); ++ len = iob_len ( iobuf ); ++ seq_len = ( len + ( ( flags & TCP_SYN ) ? 1 : 0 ) + ++ ( ( flags & TCP_FIN ) ? 1 : 0 ) ); ++ ++ /* Dump header */ ++ DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08x %4zd", ++ tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ), ++ ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ), ++ ( ntohl ( tcphdr->seq ) + seq_len ), len ); ++ tcp_dump_flags ( tcp, tcphdr->flags ); ++ DBGC2 ( tcp, "\n" ); ++ ++ /* If no connection was found, send RST */ ++ if ( ! tcp ) { ++ tcp_xmit_reset ( tcp, st_src, tcphdr ); ++ rc = -ENOTCONN; ++ goto discard; ++ } ++ ++ /* Update timestamp, if applicable */ ++ if ( options.tsopt && tcp_in_window ( tcp->rcv_ack, seq, seq_len ) ) ++ tcp->ts_recent = ntohl ( options.tsopt->tsval ); ++ ++ /* Handle ACK, if present */ ++ if ( flags & TCP_ACK ) { ++ if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) { ++ tcp_xmit_reset ( tcp, st_src, tcphdr ); ++ goto discard; ++ } ++ } ++ ++ /* Force an ACK if this packet is out of order */ ++ if ( ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) && ++ ( seq != tcp->rcv_ack ) ) { ++ tcp->flags |= TCP_ACK_PENDING; ++ } ++ ++ /* Handle SYN, if present */ ++ if ( flags & TCP_SYN ) { ++ tcp_rx_syn ( tcp, seq, &options ); ++ seq++; ++ } ++ ++ /* Handle RST, if present */ ++ if ( flags & TCP_RST ) { ++ if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 ) ++ goto discard; ++ } ++ ++ /* Enqueue received data */ ++ tcp_rx_enqueue ( tcp, seq, flags, iob_disown ( iobuf ) ); ++ ++ /* Process receive queue */ ++ tcp_process_rx_queue ( tcp ); ++ ++ /* Dump out any state change as a result of the received packet */ ++ tcp_dump_state ( tcp ); ++ ++ /* Send out any pending data */ ++ tcp_xmit ( tcp ); ++ ++ /* If this packet was the last we expect to receive, set up ++ * timer to expire and cause the connection to be freed. ++ */ ++ if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) { ++ stop_timer ( &tcp->wait ); ++ start_timer_fixed ( &tcp->wait, ( 2 * TCP_MSL ) ); ++ } ++ ++ return 0; ++ ++ discard: ++ /* Free received packet */ ++ free_iob ( iobuf ); ++ return rc; ++} ++ ++/** TCP protocol */ ++struct tcpip_protocol tcp_protocol __tcpip_protocol = { ++ .name = "TCP", ++ .rx = tcp_rx, ++ .tcpip_proto = IP_TCP, ++}; ++ ++/** ++ * Discard some cached TCP data ++ * ++ * @ret discarded Number of cached items discarded ++ */ ++static unsigned int tcp_discard ( void ) { ++ struct tcp_connection *tcp; ++ struct io_buffer *iobuf; ++ unsigned int discarded = 0; ++ ++ /* Try to drop one queued RX packet from each connection */ ++ list_for_each_entry ( tcp, &tcp_conns, list ) { ++ list_for_each_entry_reverse ( iobuf, &tcp->rx_queue, list ) { ++ list_del ( &iobuf->list ); ++ free_iob ( iobuf ); ++ discarded++; ++ break; ++ } ++ } ++ ++ return discarded; ++} ++ ++/** TCP cache discarder */ ++struct cache_discarder tcp_cache_discarder __cache_discarder = { ++ .discard = tcp_discard, ++}; ++ ++/*************************************************************************** ++ * ++ * Data transfer interface ++ * ++ *************************************************************************** ++ */ ++ ++/** ++ * Close interface ++ * ++ * @v tcp TCP connection ++ * @v rc Reason for close ++ */ ++static void tcp_xfer_close ( struct tcp_connection *tcp, int rc ) { ++ ++ /* Close data transfer interface */ ++ tcp_close ( tcp, rc ); ++ ++ /* Transmit FIN, if possible */ ++ tcp_xmit ( tcp ); ++} ++ ++/** ++ * Check flow control window ++ * ++ * @v tcp TCP connection ++ * @ret len Length of window ++ */ ++static size_t tcp_xfer_window ( struct tcp_connection *tcp ) { ++ ++ /* Not ready if data queue is non-empty. This imposes a limit ++ * of only one unACKed packet in the TX queue at any time; we ++ * do this to conserve memory usage. ++ */ ++ if ( ! list_empty ( &tcp->tx_queue ) ) ++ return 0; ++ ++ /* Return TCP window length */ ++ return tcp_xmit_win ( tcp ); ++} ++ ++/** ++ * Deliver datagram as I/O buffer ++ * ++ * @v tcp TCP connection ++ * @v iobuf Datagram I/O buffer ++ * @v meta Data transfer metadata ++ * @ret rc Return status code ++ */ ++static int tcp_xfer_deliver ( struct tcp_connection *tcp, ++ struct io_buffer *iobuf, ++ struct xfer_metadata *meta __unused ) { ++ ++ /* Enqueue packet */ ++ list_add_tail ( &iobuf->list, &tcp->tx_queue ); ++ ++ /* Transmit data, if possible */ ++ tcp_xmit ( tcp ); ++ ++ return 0; ++} ++ ++/** TCP data transfer interface operations */ ++static struct interface_operation tcp_xfer_operations[] = { ++ INTF_OP ( xfer_deliver, struct tcp_connection *, tcp_xfer_deliver ), ++ INTF_OP ( xfer_window, struct tcp_connection *, tcp_xfer_window ), ++ INTF_OP ( intf_close, struct tcp_connection *, tcp_xfer_close ), ++}; ++ ++/** TCP data transfer interface descriptor */ ++static struct interface_descriptor tcp_xfer_desc = ++ INTF_DESC ( struct tcp_connection, xfer, tcp_xfer_operations ); ++ ++/*************************************************************************** ++ * ++ * Openers ++ * ++ *************************************************************************** ++ */ ++ ++/** TCP socket opener */ ++struct socket_opener tcp_socket_opener __socket_opener = { ++ .semantics = TCP_SOCK_STREAM, ++ .family = AF_INET, ++ .open = tcp_open, ++}; ++ ++/** Linkage hack */ ++int tcp_sock_stream = TCP_SOCK_STREAM; ++ ++/** ++ * Open TCP URI ++ * ++ * @v xfer Data transfer interface ++ * @v uri URI ++ * @ret rc Return status code ++ */ ++static int tcp_open_uri ( struct interface *xfer, struct uri *uri ) { ++ struct sockaddr_tcpip peer; ++ ++ /* Sanity check */ ++ if ( ! uri->host ) ++ return -EINVAL; ++ ++ memset ( &peer, 0, sizeof ( peer ) ); ++ peer.st_port = htons ( uri_port ( uri, 0 ) ); ++ return xfer_open_named_socket ( xfer, SOCK_STREAM, ++ ( struct sockaddr * ) &peer, ++ uri->host, NULL ); ++} ++ ++/** TCP URI opener */ ++struct uri_opener tcp_uri_opener __uri_opener = { ++ .scheme = "tcp", ++ .open = tcp_open_uri, ++}; ++ diff --git a/gpxe/ipxe-expandfilename.patch b/gpxe/ipxe-expandfilename.patch new file mode 100644 index 0000000..58bcc26 --- /dev/null +++ b/gpxe/ipxe-expandfilename.patch @@ -0,0 +1,33 @@ +diff -urN ipxe-undinetchange/src/core/exec.c ipxe-expandfilename/src/core/exec.c +--- ipxe-undinetchange/src/core/exec.c 2010-11-11 14:09:48.062345190 -0500 ++++ ipxe-expandfilename/src/core/exec.c 2010-11-12 10:40:03.943584358 -0500 +@@ -94,7 +94,7 @@ + * The expanded command line is allocated with malloc() and the caller + * must eventually free() it. + */ +-static char * expand_command ( const char *command ) { ++char * expand_command ( const char *command ) { + char *expcmd; + char *start; + char *end; +diff -urN ipxe-undinetchange/src/usr/autoboot.c ipxe-expandfilename/src/usr/autoboot.c +--- ipxe-undinetchange/src/usr/autoboot.c 2010-11-12 10:31:31.972972171 -0500 ++++ ipxe-expandfilename/src/usr/autoboot.c 2010-11-12 10:44:23.972334241 -0500 +@@ -33,6 +33,8 @@ + #include + #include + #include ++char * expand_command ( const char *command ); ++ + + /** @file + * +@@ -97,6 +99,8 @@ + uri_encode ( filename, buf + strlen ( buf ), + sizeof ( buf ) - strlen ( buf ), URI_PATH ); + filename = buf; ++ } else { /* I don't think it could hurt the tftp case, but might as well stay out of a codepath I don't intend to rigorously test */ ++ filename = expand_command(filename); + } + + /* Download and boot image */ diff --git a/gpxe/ipxe-machyp.patch b/gpxe/ipxe-machyp.patch new file mode 100644 index 0000000..a22f335 --- /dev/null +++ b/gpxe/ipxe-machyp.patch @@ -0,0 +1,141 @@ +diff -urN ipxe-cmdlinesize/src/core/settings.c ipxe-machyp/src/core/settings.c +--- ipxe-cmdlinesize/src/core/settings.c 2010-11-11 14:09:48.062345190 -0500 ++++ ipxe-machyp/src/core/settings.c 2010-11-12 10:49:22.822334798 -0500 +@@ -1321,6 +1321,72 @@ + } + + /** ++ * Parse and store value of hex string setting, hyphen delimited ++ * ++ * @v settings Settings block ++ * @v setting Setting to store ++ * @v value Formatted setting data ++ * @ret rc Return status code ++ */ ++static int storef_hexhyp ( struct settings *settings, struct setting *setting, ++ const char *value ) { ++ char *ptr = ( char * ) value; ++ uint8_t bytes[ strlen ( value ) ]; /* cannot exceed strlen(value) */ ++ unsigned int len = 0; ++ ++ while ( 1 ) { ++ bytes[len++] = strtoul ( ptr, &ptr, 16 ); ++ switch ( *ptr ) { ++ case '\0' : ++ return store_setting ( settings, setting, bytes, len ); ++ case '-' : ++ ptr++; ++ break; ++ default : ++ return -EINVAL; ++ } ++ } ++} ++/** ++ * Fetch and format value of hex string setting with hypphen delimiter ++ * ++ * @v settings Settings block, or NULL to search all blocks ++ * @v setting Setting to fetch ++ * @v buf Buffer to contain formatted value ++ * @v len Length of buffer ++ * @ret len Length of formatted value, or negative error ++ */ ++static int fetchf_hexhyp ( struct settings *settings, struct setting *setting, ++ char *buf, size_t len ) { ++ int raw_len; ++ int check_len; ++ int used = 0; ++ int i; ++ ++ raw_len = fetch_setting_len ( settings, setting ); ++ if ( raw_len < 0 ) ++ return raw_len; ++ ++ { ++ uint8_t raw[raw_len]; ++ ++ check_len = fetch_setting ( settings, setting, raw, ++ sizeof ( raw ) ); ++ if ( check_len < 0 ) ++ return check_len; ++ assert ( check_len == raw_len ); ++ ++ if ( len ) ++ buf[0] = 0; /* Ensure that a terminating NUL exists */ ++ for ( i = 0 ; i < raw_len ; i++ ) { ++ used += ssnprintf ( ( buf + used ), ( len - used ), ++ "%s%02x", ( used ? "-" : "" ), ++ raw[i] ); ++ } ++ return used; ++ } ++} ++/** + * Fetch and format value of hex string setting + * + * @v settings Settings block, or NULL to search all blocks +@@ -1366,6 +1432,12 @@ + .storef = storef_hex, + .fetchf = fetchf_hex, + }; ++/** A hex-string setting, hyphen delimited */ ++struct setting_type setting_type_hexhyp __setting_type = { ++ .name = "hexhyp", ++ .storef = storef_hexhyp, ++ .fetchf = fetchf_hexhyp, ++}; + + /** + * Parse and store value of UUID setting +diff -urN ipxe-cmdlinesize/src/include/ipxe/settings.h ipxe-machyp/src/include/ipxe/settings.h +--- ipxe-cmdlinesize/src/include/ipxe/settings.h 2010-11-11 14:09:48.132334810 -0500 ++++ ipxe-machyp/src/include/ipxe/settings.h 2010-11-12 10:50:51.433584483 -0500 +@@ -227,6 +227,7 @@ + extern struct setting_type setting_type_uint16 __setting_type; + extern struct setting_type setting_type_uint32 __setting_type; + extern struct setting_type setting_type_hex __setting_type; ++extern struct setting_type setting_type_hexhyp __setting_type; + extern struct setting_type setting_type_uuid __setting_type; + + extern struct setting ip_setting __setting; +@@ -243,6 +244,7 @@ + extern struct setting uuid_setting __setting; + extern struct setting next_server_setting __setting; + extern struct setting mac_setting __setting; ++extern struct setting machyp_setting __setting; + extern struct setting busid_setting __setting; + extern struct setting user_class_setting __setting; + +diff -urN ipxe-cmdlinesize/src/net/netdev_settings.c ipxe-machyp/src/net/netdev_settings.c +--- ipxe-cmdlinesize/src/net/netdev_settings.c 2010-11-11 14:09:48.132334810 -0500 ++++ ipxe-machyp/src/net/netdev_settings.c 2010-11-12 10:49:22.822334798 -0500 +@@ -38,6 +38,11 @@ + .description = "MAC address", + .type = &setting_type_hex, + }; ++struct setting machyp_setting __setting = { ++ .name = "machyp", ++ .description = "MAC address", ++ .type = &setting_type_hexhyp, ++}; + struct setting busid_setting __setting = { + .name = "busid", + .description = "Bus ID", +@@ -58,7 +63,7 @@ + struct net_device *netdev = container_of ( settings, struct net_device, + settings.settings ); + +- if ( setting_cmp ( setting, &mac_setting ) == 0 ) { ++ if (( setting_cmp ( setting, &mac_setting ) == 0 ) || ( setting_cmp ( setting, &machyp_setting ) == 0 )) { + if ( len != netdev->ll_protocol->ll_addr_len ) + return -EINVAL; + memcpy ( netdev->ll_addr, data, len ); +@@ -84,7 +89,7 @@ + struct device_description *desc = &netdev->dev->desc; + struct dhcp_netdev_desc dhcp_desc; + +- if ( setting_cmp ( setting, &mac_setting ) == 0 ) { ++ if (( setting_cmp ( setting, &mac_setting ) == 0 ) || ( setting_cmp ( setting, &machyp_setting ) == 0 )) { + if ( len > netdev->ll_protocol->ll_addr_len ) + len = netdev->ll_protocol->ll_addr_len; + memcpy ( data, netdev->ll_addr, len ); diff --git a/gpxe/ipxe-undinetchange.patch b/gpxe/ipxe-undinetchange.patch new file mode 100644 index 0000000..ac629af --- /dev/null +++ b/gpxe/ipxe-undinetchange.patch @@ -0,0 +1,16 @@ +Allegedly some UNDI stacks don't cope well with interrupts. This patch pretty +much turns the world into a polling world. This is relatively hideous, but a +fairly short lifetime. + +diff -urN ipxe-xnbaclass/src/arch/i386/drivers/net/undinet.c ipxe-undinetchange/src/arch/i386/drivers/net/undinet.c +--- ipxe-xnbaclass/src/arch/i386/drivers/net/undinet.c 2010-11-11 14:09:48.042334268 -0500 ++++ ipxe-undinetchange/src/arch/i386/drivers/net/undinet.c 2010-11-12 10:37:01.332334042 -0500 +@@ -258,7 +258,7 @@ + "nop\n\t" + "nop\n\t" + "cli\n\t" ) : : ); +- return; ++ //return; + } + + /* Start ISR processing */ diff --git a/gpxe/ipxe-xnbaclass.patch b/gpxe/ipxe-xnbaclass.patch new file mode 100644 index 0000000..5f6a97b --- /dev/null +++ b/gpxe/ipxe-xnbaclass.patch @@ -0,0 +1,12 @@ +diff -urN ipxe-droppackets/src/net/udp/dhcp.c ipxe-xnbaclass/src/net/udp/dhcp.c +--- ipxe-droppackets/src/net/udp/dhcp.c 2010-11-11 14:09:48.142334824 -0500 ++++ ipxe-xnbaclass/src/net/udp/dhcp.c 2010-11-12 10:35:08.482334675 -0500 +@@ -79,7 +79,7 @@ + DHCP_CLIENT_ARCHITECTURE, DHCP_ARCH_CLIENT_ARCHITECTURE, + DHCP_CLIENT_NDI, DHCP_ARCH_CLIENT_NDI, + DHCP_VENDOR_CLASS_ID, DHCP_ARCH_VENDOR_CLASS_ID, +- DHCP_USER_CLASS_ID, DHCP_STRING ( 'i', 'P', 'X', 'E' ), ++ DHCP_USER_CLASS_ID, DHCP_STRING ( 'x', 'N', 'B', 'A' ), + DHCP_PARAMETER_REQUEST_LIST, + DHCP_OPTION ( DHCP_SUBNET_MASK, DHCP_ROUTERS, DHCP_DNS_SERVERS, + DHCP_LOG_SERVERS, DHCP_HOST_NAME, DHCP_DOMAIN_NAME,