From 3e975ecf3fee92394878b75046f70375ada597a2 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 27 Aug 2013 13:46:12 +0100 Subject: [PATCH 01/18] [settings] Make built-in settings a linker table Allow for configurable provision of built-in settings by placing them in a linker table rather than an array. Signed-off-by: Michael Brown --- src/core/settings.c | 92 ++++++++++++++++++------------------- src/include/ipxe/settings.h | 22 +++++++++ 2 files changed, 68 insertions(+), 46 deletions(-) diff --git a/src/core/settings.c b/src/core/settings.c index 87de0d1b..0fe8b2eb 100644 --- a/src/core/settings.c +++ b/src/core/settings.c @@ -2247,29 +2247,8 @@ struct setting priority_setting __setting ( SETTING_MISC ) = { ****************************************************************************** */ -/** A built-in setting operation */ -struct builtin_setting_operation { - /** Setting */ - struct setting *setting; - /** Fetch setting value - * - * @v data Buffer to fill with setting data - * @v len Length of buffer - * @ret len Length of setting data, or negative error - */ - int ( * fetch ) ( void *data, size_t len ); -}; - /** Built-in setting scope */ -static struct settings_scope builtin_scope; - -/** Error number setting */ -struct setting errno_setting __setting ( SETTING_MISC ) = { - .name = "errno", - .description = "Last error", - .type = &setting_type_uint32, - .scope = &builtin_scope, -}; +struct settings_scope builtin_scope; /** * Fetch error number setting @@ -2289,14 +2268,20 @@ static int errno_fetch ( void *data, size_t len ) { return sizeof ( content ); } -/** Build architecture setting */ -struct setting buildarch_setting __setting ( SETTING_MISC ) = { - .name = "buildarch", - .description = "Build architecture", - .type = &setting_type_string, +/** Error number setting */ +struct setting errno_setting __setting ( SETTING_MISC ) = { + .name = "errno", + .description = "Last error", + .type = &setting_type_uint32, .scope = &builtin_scope, }; +/** Error number built-in setting */ +struct builtin_setting errno_builtin_setting __builtin_setting = { + .setting = &errno_setting, + .fetch = errno_fetch, +}; + /** * Fetch build architecture setting * @@ -2311,14 +2296,20 @@ static int buildarch_fetch ( void *data, size_t len ) { return ( sizeof ( buildarch ) - 1 /* NUL */ ); } -/** Platform setting */ -struct setting platform_setting __setting ( SETTING_MISC ) = { - .name = "platform", - .description = "Platform", +/** Build architecture setting */ +struct setting buildarch_setting __setting ( SETTING_MISC ) = { + .name = "buildarch", + .description = "Build architecture", .type = &setting_type_string, .scope = &builtin_scope, }; +/** Build architecture built-in setting */ +struct builtin_setting buildarch_builtin_setting __builtin_setting = { + .setting = &buildarch_setting, + .fetch = buildarch_fetch, +}; + /** * Fetch platform setting * @@ -2333,14 +2324,20 @@ static int platform_fetch ( void *data, size_t len ) { return ( sizeof ( platform ) - 1 /* NUL */ ); } -/** Version setting */ -struct setting version_setting __setting ( SETTING_MISC ) = { - .name = "version", - .description = "Version", +/** Platform setting */ +struct setting platform_setting __setting ( SETTING_MISC ) = { + .name = "platform", + .description = "Platform", .type = &setting_type_string, .scope = &builtin_scope, }; +/** Platform built-in setting */ +struct builtin_setting platform_builtin_setting __builtin_setting = { + .setting = &platform_setting, + .fetch = platform_fetch, +}; + /** * Fetch version setting * @@ -2353,12 +2350,18 @@ static int version_fetch ( void *data, size_t len ) { return ( strlen ( product_version ) ); } -/** List of built-in setting operations */ -static struct builtin_setting_operation builtin_setting_operations[] = { - { &errno_setting, errno_fetch }, - { &buildarch_setting, buildarch_fetch }, - { &platform_setting, platform_fetch }, - { &version_setting, version_fetch }, +/** Version setting */ +struct setting version_setting __setting ( SETTING_MISC ) = { + .name = "version", + .description = "Version", + .type = &setting_type_string, + .scope = &builtin_scope, +}; + +/** Version built-in setting */ +struct builtin_setting version_builtin_setting __builtin_setting = { + .setting = &version_setting, + .fetch = version_fetch, }; /** @@ -2373,12 +2376,9 @@ static struct builtin_setting_operation builtin_setting_operations[] = { static int builtin_fetch ( struct settings *settings __unused, struct setting *setting, void *data, size_t len ) { - struct builtin_setting_operation *builtin; - unsigned int i; + struct builtin_setting *builtin; - for ( i = 0 ; i < ( sizeof ( builtin_setting_operations ) / - sizeof ( builtin_setting_operations[0] ) ) ; i++ ) { - builtin = &builtin_setting_operations[i]; + for_each_table_entry ( builtin, BUILTIN_SETTINGS ) { if ( setting_cmp ( setting, builtin->setting ) == 0 ) return builtin->fetch ( data, len ); } diff --git a/src/include/ipxe/settings.h b/src/include/ipxe/settings.h index d1666e1d..4f161072 100644 --- a/src/include/ipxe/settings.h +++ b/src/include/ipxe/settings.h @@ -253,6 +253,28 @@ struct settings_applicator { /** Declare a settings applicator */ #define __settings_applicator __table_entry ( SETTINGS_APPLICATORS, 01 ) +/** A built-in setting */ +struct builtin_setting { + /** Setting */ + struct setting *setting; + /** Fetch setting value + * + * @v data Buffer to fill with setting data + * @v len Length of buffer + * @ret len Length of setting data, or negative error + */ + int ( * fetch ) ( void *data, size_t len ); +}; + +/** Built-in settings table */ +#define BUILTIN_SETTINGS __table ( struct builtin_setting, "builtin_settings" ) + +/** Declare a built-in setting */ +#define __builtin_setting __table_entry ( BUILTIN_SETTINGS, 01 ) + +/** Built-in setting scope */ +extern struct settings_scope builtin_scope; + /** * A generic settings block * From ae0124cd404c0ee25152bef70bee9e2030054ce7 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 27 Aug 2013 16:28:54 +0100 Subject: [PATCH 02/18] [linux] Give tap devices a name and bus type Give tap devices a meaningful name, and avoid segmentation faults when attempting to retrieve ${net0/bustype} by assigning a new bus type for tap devices. Signed-off-by: Michael Brown --- src/drivers/linux/tap.c | 3 +++ src/include/ipxe/device.h | 3 +++ src/net/netdev_settings.c | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/drivers/linux/tap.c b/src/drivers/linux/tap.c index 53bb16a3..99937d74 100644 --- a/src/drivers/linux/tap.c +++ b/src/drivers/linux/tap.c @@ -216,6 +216,9 @@ static int tap_probe(struct linux_device *device, struct linux_device_request *r } nic->interface = if_setting->value; + snprintf ( device->dev.name, sizeof ( device->dev.name ), "%s", + nic->interface ); + device->dev.desc.bus_type = BUS_TYPE_TAP; if_setting->applied = 1; /* Apply rest of the settings */ diff --git a/src/include/ipxe/device.h b/src/include/ipxe/device.h index 435af6ec..c59697c0 100644 --- a/src/include/ipxe/device.h +++ b/src/include/ipxe/device.h @@ -54,6 +54,9 @@ struct device_description { /** ISA bus type */ #define BUS_TYPE_ISA 5 +/** TAP bus type */ +#define BUS_TYPE_TAP 6 + /** A hardware device */ struct device { /** Name */ diff --git a/src/net/netdev_settings.c b/src/net/netdev_settings.c index 52e8007d..994e0506 100644 --- a/src/net/netdev_settings.c +++ b/src/net/netdev_settings.c @@ -120,6 +120,7 @@ static int netdev_fetch_bustype ( struct net_device *netdev, void *data, [BUS_TYPE_EISA] = "EISA", [BUS_TYPE_MCA] = "MCA", [BUS_TYPE_ISA] = "ISA", + [BUS_TYPE_TAP] = "TAP", }; struct device_description *desc = &netdev->dev->desc; const char *bustype; @@ -127,7 +128,7 @@ static int netdev_fetch_bustype ( struct net_device *netdev, void *data, assert ( desc->bus_type < ( sizeof ( bustypes ) / sizeof ( bustypes[0] ) ) ); bustype = bustypes[desc->bus_type]; - assert ( bustypes != NULL ); + assert ( bustype != NULL ); strncpy ( data, bustype, len ); return strlen ( bustype ); } From 22a0c4475c91c745f0e5cc1171939359921d09f9 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 27 Aug 2013 16:08:15 +0100 Subject: [PATCH 03/18] [ipv4] Generalise fragment reassembly mechanism Generalise the concept of fragment reassembly to allow for code sharing between IPv4 and IPv6 protocols. Signed-off-by: Michael Brown --- src/include/ipxe/fragment.h | 68 ++++++++++++++ src/include/ipxe/ip.h | 12 --- src/net/fragment.c | 172 ++++++++++++++++++++++++++++++++++++ src/net/ipv4.c | 166 +++++++++------------------------- 4 files changed, 284 insertions(+), 134 deletions(-) create mode 100644 src/include/ipxe/fragment.h create mode 100644 src/net/fragment.c diff --git a/src/include/ipxe/fragment.h b/src/include/ipxe/fragment.h new file mode 100644 index 00000000..6b47439d --- /dev/null +++ b/src/include/ipxe/fragment.h @@ -0,0 +1,68 @@ +#ifndef _IPXE_FRAGMENT_H +#define _IPXE_FRAGMENT_H + +/** @file + * + * Fragment reassembly + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include + +/** Fragment reassembly timeout */ +#define FRAGMENT_TIMEOUT ( TICKS_PER_SEC / 2 ) + +/** A fragment reassembly buffer */ +struct fragment { + /* List of fragment reassembly buffers */ + struct list_head list; + /** Reassembled packet */ + struct io_buffer *iobuf; + /** Length of non-fragmentable portion of reassembled packet */ + size_t hdrlen; + /** Reassembly timer */ + struct retry_timer timer; +}; + +/** A fragment reassembler */ +struct fragment_reassembler { + /** List of fragment reassembly buffers */ + struct list_head list; + /** + * Check if fragment matches fragment reassembly buffer + * + * @v fragment Fragment reassembly buffer + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret is_fragment Fragment matches this reassembly buffer + */ + int ( * is_fragment ) ( struct fragment *fragment, + struct io_buffer *iobuf, size_t hdrlen ); + /** + * Get fragment offset + * + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret offset Offset + */ + size_t ( * fragment_offset ) ( struct io_buffer *iobuf, size_t hdrlen ); + /** + * Check if more fragments exist + * + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret more_frags More fragments exist + */ + int ( * more_fragments ) ( struct io_buffer *iobuf, size_t hdrlen ); +}; + +extern struct io_buffer * +fragment_reassemble ( struct fragment_reassembler *fragments, + struct io_buffer *iobuf, size_t *hdrlen ); + +#endif /* _IPXE_FRAGMENT_H */ diff --git a/src/include/ipxe/ip.h b/src/include/ipxe/ip.h index ca508e27..3234b7b0 100644 --- a/src/include/ipxe/ip.h +++ b/src/include/ipxe/ip.h @@ -70,18 +70,6 @@ struct ipv4_miniroute { struct in_addr gateway; }; -/* IPv4 fragment reassembly buffer */ -struct ipv4_fragment { - /* List of fragment reassembly buffers */ - struct list_head list; - /** Reassembled packet */ - struct io_buffer *iobuf; - /** Current offset */ - size_t offset; - /** Reassembly timer */ - struct retry_timer timer; -}; - extern struct list_head ipv4_miniroutes; extern struct net_protocol ipv4_protocol __net_protocol; diff --git a/src/net/fragment.c b/src/net/fragment.c new file mode 100644 index 00000000..3e1dfdf7 --- /dev/null +++ b/src/net/fragment.c @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include +#include +#include + +/** @file + * + * Fragment reassembly + * + */ + +/** + * Expire fragment reassembly buffer + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void fragment_expired ( struct retry_timer *timer, int fail __unused ) { + struct fragment *fragment = + container_of ( timer, struct fragment, timer ); + + DBGC ( fragment, "FRAG %p expired\n", fragment ); + free_iob ( fragment->iobuf ); + list_del ( &fragment->list ); + free ( fragment ); +} + +/** + * Find fragment reassembly buffer + * + * @v fragments Fragment reassembler + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret fragment Fragment reassembly buffer, or NULL if not found + */ +static struct fragment * fragment_find ( struct fragment_reassembler *fragments, + struct io_buffer *iobuf, + size_t hdrlen ) { + struct fragment *fragment; + + list_for_each_entry ( fragment, &fragments->list, list ) { + if ( fragments->is_fragment ( fragment, iobuf, hdrlen ) ) + return fragment; + } + return NULL; +} + +/** + * Reassemble packet + * + * @v fragments Fragment reassembler + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret iobuf Reassembled packet, or NULL + * + * This function takes ownership of the I/O buffer. Note that the + * length of the non-fragmentable portion may be modified. + */ +struct io_buffer * fragment_reassemble ( struct fragment_reassembler *fragments, + struct io_buffer *iobuf, + size_t *hdrlen ) { + struct fragment *fragment; + struct io_buffer *new_iobuf; + size_t new_len; + size_t offset; + size_t expected_offset; + int more_frags; + + /* Find matching fragment reassembly buffer, if any */ + fragment = fragment_find ( fragments, iobuf, *hdrlen ); + + /* Drop out-of-order fragments */ + offset = fragments->fragment_offset ( iobuf, *hdrlen ); + expected_offset = ( fragment ? ( iob_len ( fragment->iobuf ) - + fragment->hdrlen ) : 0 ); + if ( offset != expected_offset ) { + DBGC ( fragment, "FRAG %p dropping out-of-sequence fragment " + "[%zd,%zd), expected [%zd,...)\n", fragment, offset, + ( offset + iob_len ( iobuf ) - *hdrlen ), + expected_offset ); + goto drop; + } + + /* Create or extend fragment reassembly buffer as applicable */ + if ( ! fragment ) { + + /* Create new fragment reassembly buffer */ + fragment = zalloc ( sizeof ( *fragment ) ); + if ( ! fragment ) + goto drop; + list_add ( &fragment->list, &fragments->list ); + fragment->iobuf = iobuf; + fragment->hdrlen = *hdrlen; + timer_init ( &fragment->timer, fragment_expired, NULL ); + DBGC ( fragment, "FRAG %p [0,%zd)\n", fragment, + ( iob_len ( iobuf ) - *hdrlen ) ); + + } else { + + /* Check if this is the final fragment */ + more_frags = fragments->more_fragments ( iobuf, *hdrlen ); + DBGC ( fragment, "FRAG %p [%zd,%zd)%s\n", fragment, + offset, ( offset + iob_len ( iobuf ) - *hdrlen ), + ( more_frags ? "" : " complete" ) ); + + /* Extend fragment reassembly buffer. Preserve I/O + * buffer headroom to allow for code which modifies + * and resends the buffer (e.g. ICMP echo responses). + */ + iob_pull ( iobuf, *hdrlen ); + new_len = ( iob_headroom ( fragment->iobuf ) + + iob_len ( fragment->iobuf ) + iob_len ( iobuf ) ); + new_iobuf = alloc_iob ( new_len ); + if ( ! new_iobuf ) { + DBGC ( fragment, "FRAG %p could not extend reassembly " + "buffer to %zd bytes\n", fragment, new_len ); + goto drop; + } + iob_reserve ( new_iobuf, iob_headroom ( fragment->iobuf ) ); + memcpy ( iob_put ( new_iobuf, iob_len ( fragment->iobuf ) ), + fragment->iobuf->data, iob_len ( fragment->iobuf ) ); + memcpy ( iob_put ( new_iobuf, iob_len ( iobuf ) ), + iobuf->data, iob_len ( iobuf ) ); + free_iob ( fragment->iobuf ); + fragment->iobuf = new_iobuf; + free_iob ( iobuf ); + + /* Stop fragment reassembly timer */ + stop_timer ( &fragment->timer ); + + /* If this is the final fragment, return it */ + if ( ! more_frags ) { + iobuf = fragment->iobuf; + *hdrlen = fragment->hdrlen; + list_del ( &fragment->list ); + free ( fragment ); + return iobuf; + } + } + + /* (Re)start fragment reassembly timer */ + start_timer_fixed ( &fragment->timer, FRAGMENT_TIMEOUT ); + + return NULL; + + drop: + free_iob ( iobuf ); + return NULL; +} diff --git a/src/net/ipv4.c b/src/net/ipv4.c index 791d4195..106e8e79 100644 --- a/src/net/ipv4.c +++ b/src/net/ipv4.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /** @file * @@ -30,12 +30,6 @@ static uint8_t next_ident_high = 0; /** List of IPv4 miniroutes */ struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes ); -/** List of fragment reassembly buffers */ -static LIST_HEAD ( ipv4_fragments ); - -/** Fragment reassembly timeout */ -#define IP_FRAG_TIMEOUT ( TICKS_PER_SEC / 2 ) - /** * Add IPv4 minirouting table entry * @@ -133,131 +127,59 @@ static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) { } /** - * Expire fragment reassembly buffer + * Check if IPv4 fragment matches fragment reassembly buffer * - * @v timer Retry timer - * @v fail Failure indicator + * @v fragment Fragment reassembly buffer + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret is_fragment Fragment matches this reassembly buffer */ -static void ipv4_fragment_expired ( struct retry_timer *timer, - int fail __unused ) { - struct ipv4_fragment *frag = - container_of ( timer, struct ipv4_fragment, timer ); - struct iphdr *iphdr = frag->iobuf->data; +static int ipv4_is_fragment ( struct fragment *fragment, + struct io_buffer *iobuf, + size_t hdrlen __unused ) { + struct iphdr *frag_iphdr = fragment->iobuf->data; + struct iphdr *iphdr = iobuf->data; - DBGC ( iphdr->src, "IPv4 fragment %04x expired\n", - ntohs ( iphdr->ident ) ); - free_iob ( frag->iobuf ); - list_del ( &frag->list ); - free ( frag ); + return ( ( iphdr->src.s_addr == frag_iphdr->src.s_addr ) && + ( iphdr->ident == frag_iphdr->ident ) ); } /** - * Find matching fragment reassembly buffer - * - * @v iphdr IPv4 header - * @ret frag Fragment reassembly buffer, or NULL - */ -static struct ipv4_fragment * ipv4_fragment ( struct iphdr *iphdr ) { - struct ipv4_fragment *frag; - struct iphdr *frag_iphdr; - - list_for_each_entry ( frag, &ipv4_fragments, list ) { - frag_iphdr = frag->iobuf->data; - - if ( ( iphdr->src.s_addr == frag_iphdr->src.s_addr ) && - ( iphdr->ident == frag_iphdr->ident ) ) { - return frag; - } - } - - return NULL; -} - -/** - * Fragment reassembler + * Get IPv4 fragment offset * * @v iobuf I/O buffer - * @ret iobuf Reassembled packet, or NULL + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret offset Offset */ -static struct io_buffer * ipv4_reassemble ( struct io_buffer *iobuf ) { +static size_t ipv4_fragment_offset ( struct io_buffer *iobuf, + size_t hdrlen __unused ) { struct iphdr *iphdr = iobuf->data; - size_t offset = ( ( ntohs ( iphdr->frags ) & IP_MASK_OFFSET ) << 3 ); - unsigned int more_frags = ( iphdr->frags & htons ( IP_MASK_MOREFRAGS )); - size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 ); - struct ipv4_fragment *frag; - size_t expected_offset; - struct io_buffer *new_iobuf; - /* Find matching fragment reassembly buffer, if any */ - frag = ipv4_fragment ( iphdr ); - - /* Drop out-of-order fragments */ - expected_offset = ( frag ? frag->offset : 0 ); - if ( offset != expected_offset ) { - DBGC ( iphdr->src, "IPv4 dropping out-of-sequence fragment " - "%04x (%zd+%zd, expected %zd)\n", - ntohs ( iphdr->ident ), offset, - ( iob_len ( iobuf ) - hdrlen ), expected_offset ); - goto drop; - } - - /* Create or extend fragment reassembly buffer as applicable */ - if ( frag == NULL ) { - - /* Create new fragment reassembly buffer */ - frag = zalloc ( sizeof ( *frag ) ); - if ( ! frag ) - goto drop; - list_add ( &frag->list, &ipv4_fragments ); - frag->iobuf = iobuf; - frag->offset = ( iob_len ( iobuf ) - hdrlen ); - timer_init ( &frag->timer, ipv4_fragment_expired, NULL ); - - } else { - - /* Extend reassembly buffer */ - iob_pull ( iobuf, hdrlen ); - new_iobuf = alloc_iob ( iob_len ( frag->iobuf ) + - iob_len ( iobuf ) ); - if ( ! new_iobuf ) { - DBGC ( iphdr->src, "IPv4 could not extend reassembly " - "buffer to %zd bytes\n", - iob_len ( frag->iobuf ) + iob_len ( iobuf ) ); - goto drop; - } - memcpy ( iob_put ( new_iobuf, iob_len ( frag->iobuf ) ), - frag->iobuf->data, iob_len ( frag->iobuf ) ); - memcpy ( iob_put ( new_iobuf, iob_len ( iobuf ) ), - iobuf->data, iob_len ( iobuf ) ); - free_iob ( frag->iobuf ); - frag->iobuf = new_iobuf; - frag->offset += iob_len ( iobuf ); - free_iob ( iobuf ); - iphdr = frag->iobuf->data; - iphdr->len = ntohs ( iob_len ( frag->iobuf ) ); - - /* Stop fragment reassembly timer */ - stop_timer ( &frag->timer ); - - /* If this is the final fragment, return it */ - if ( ! more_frags ) { - iobuf = frag->iobuf; - list_del ( &frag->list ); - free ( frag ); - return iobuf; - } - } - - /* (Re)start fragment reassembly timer */ - start_timer_fixed ( &frag->timer, IP_FRAG_TIMEOUT ); - - return NULL; - - drop: - free_iob ( iobuf ); - return NULL; + return ( ( ntohs ( iphdr->frags ) & IP_MASK_OFFSET ) << 3 ); } +/** + * Check if more fragments exist + * + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret more_frags More fragments exist + */ +static int ipv4_more_fragments ( struct io_buffer *iobuf, + size_t hdrlen __unused ) { + struct iphdr *iphdr = iobuf->data; + + return ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ); +} + +/** IPv4 fragment reassembler */ +static struct fragment_reassembler ipv4_reassembler = { + .list = LIST_HEAD_INIT ( ipv4_reassembler.list ), + .is_fragment = ipv4_is_fragment, + .fragment_offset = ipv4_fragment_offset, + .more_fragments = ipv4_more_fragments, +}; + /** * Add IPv4 pseudo-header checksum to existing checksum * @@ -526,14 +448,14 @@ static int ipv4_rx ( struct io_buffer *iobuf, /* Perform fragment reassembly if applicable */ if ( iphdr->frags & htons ( IP_MASK_OFFSET | IP_MASK_MOREFRAGS ) ) { - /* Pass the fragment to ipv4_reassemble() which returns + /* Pass the fragment to fragment_reassemble() which returns * either a fully reassembled I/O buffer or NULL. */ - iobuf = ipv4_reassemble ( iobuf ); + iobuf = fragment_reassemble ( &ipv4_reassembler, iobuf, + &hdrlen ); if ( ! iobuf ) return 0; iphdr = iobuf->data; - hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 ); } /* Construct socket addresses, calculate pseudo-header From c9ed9cb4e7227a5c2a90fe53e1d8282488fd8add Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 2 Sep 2013 22:31:04 +0100 Subject: [PATCH 04/18] [netdevice] Increase maximum network-layer address length IPv6 uses 16-byte network-layer addresses. Signed-off-by: Michael Brown --- src/include/ipxe/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/ipxe/netdevice.h b/src/include/ipxe/netdevice.h index 0daa1d62..d0e628a4 100644 --- a/src/include/ipxe/netdevice.h +++ b/src/include/ipxe/netdevice.h @@ -44,7 +44,7 @@ struct device; #define MAX_LL_HEADER_LEN 36 /** Maximum length of a network-layer address */ -#define MAX_NET_ADDR_LEN 4 +#define MAX_NET_ADDR_LEN 16 /** Maximum length of a network-layer header * From d5f69e93887cb9ad505b7ed6394891652f393885 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 29 Aug 2013 14:10:16 +0100 Subject: [PATCH 05/18] [netdevice] Add concept of a network device index IPv6 link-local socket addresses require some way to specify a local network device. We cannot simply use a pointer to the network device, since a struct sockaddr_in6 may be long-lived and has no way to hold a reference to the network device. Using a network device index allows a socket address to cleanly refer to a network device without worrying about whether or not that device continues to exist. Signed-off-by: Michael Brown --- src/include/ipxe/netdevice.h | 2 ++ src/net/netdevice.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/include/ipxe/netdevice.h b/src/include/ipxe/netdevice.h index d0e628a4..7288ad86 100644 --- a/src/include/ipxe/netdevice.h +++ b/src/include/ipxe/netdevice.h @@ -300,6 +300,8 @@ struct net_device { struct list_head list; /** List of open network devices */ struct list_head open_list; + /** Index of this network device */ + unsigned int index; /** Name of this network device */ char name[12]; /** Underlying hardware device */ diff --git a/src/net/netdevice.c b/src/net/netdevice.c index 5af9c6dc..283632f6 100644 --- a/src/net/netdevice.c +++ b/src/net/netdevice.c @@ -498,10 +498,11 @@ int register_netdev ( struct net_device *netdev ) { uint32_t seed; int rc; - /* Create device name */ + /* Record device index and create device name */ + netdev->index = ifindex++; if ( netdev->name[0] == '\0' ) { snprintf ( netdev->name, sizeof ( netdev->name ), "net%d", - ifindex++ ); + netdev->index ); } /* Set initial link-layer address, if not already set */ From 0b65c8cad6ed1b03ce1870856b3496c7be1f8bf1 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 30 Aug 2013 19:05:03 +0100 Subject: [PATCH 06/18] [netdevice] Add method for generating EUI-64 address from link-layer address Signed-off-by: Michael Brown --- src/drivers/net/ipoib.c | 1 + src/include/ipxe/ethernet.h | 1 + src/include/ipxe/netdevice.h | 9 +++++++++ src/net/80211/net80211.c | 1 + src/net/ethernet.c | 16 ++++++++++++++++ 5 files changed, 28 insertions(+) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index c1b8cad9..1b539177 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -265,6 +265,7 @@ struct ll_protocol ipoib_protocol __ll_protocol = { .ntoa = eth_ntoa, .mc_hash = eth_mc_hash, .eth_addr = eth_eth_addr, + .eui64 = eth_eui64, .flags = LL_NAME_ONLY, }; diff --git a/src/include/ipxe/ethernet.h b/src/include/ipxe/ethernet.h index 1794ff67..5ffc45b7 100644 --- a/src/include/ipxe/ethernet.h +++ b/src/include/ipxe/ethernet.h @@ -91,6 +91,7 @@ extern const char * eth_ntoa ( const void *ll_addr ); extern int eth_mc_hash ( unsigned int af, const void *net_addr, void *ll_addr ); extern int eth_eth_addr ( const void *ll_addr, void *eth_addr ); +extern int eth_eui64 ( const void *ll_addr, void *eui64 ); extern struct net_device * alloc_etherdev ( size_t priv_size ); #endif /* _IPXE_ETHERNET_H */ diff --git a/src/include/ipxe/netdevice.h b/src/include/ipxe/netdevice.h index 7288ad86..7f819d9a 100644 --- a/src/include/ipxe/netdevice.h +++ b/src/include/ipxe/netdevice.h @@ -175,8 +175,17 @@ struct ll_protocol { * * @v ll_addr Link-layer address * @v eth_addr Ethernet-compatible address to fill in + * @ret rc Return status code */ int ( * eth_addr ) ( const void *ll_addr, void *eth_addr ); + /** + * Generate EUI-64 address + * + * @v ll_addr Link-layer address + * @v eui64 EUI-64 address to fill in + * @ret rc Return status code + */ + int ( * eui64 ) ( const void *ll_addr, void *eui64 ); /** Link-layer protocol * * This is an ARPHRD_XXX constant, in network byte order. diff --git a/src/net/80211/net80211.c b/src/net/80211/net80211.c index 54df7905..3893f652 100644 --- a/src/net/80211/net80211.c +++ b/src/net/80211/net80211.c @@ -599,6 +599,7 @@ static struct ll_protocol net80211_ll_protocol __ll_protocol = { .ntoa = eth_ntoa, .mc_hash = eth_mc_hash, .eth_addr = eth_eth_addr, + .eui64 = eth_eui64, .ll_proto = htons ( ARPHRD_ETHER ), /* "encapsulated Ethernet" */ .hw_addr_len = ETH_ALEN, .ll_addr_len = ETH_ALEN, diff --git a/src/net/ethernet.c b/src/net/ethernet.c index 4fd2ab6e..013b2d76 100644 --- a/src/net/ethernet.c +++ b/src/net/ethernet.c @@ -165,6 +165,21 @@ int eth_eth_addr ( const void *ll_addr, void *eth_addr ) { return 0; } +/** + * Generate EUI-64 address + * + * @v ll_addr Link-layer address + * @v eui64 EUI-64 address to fill in + * @ret rc Return status code + */ +int eth_eui64 ( const void *ll_addr, void *eui64 ) { + + memcpy ( ( eui64 + 0 ), ( ll_addr + 0 ), 3 ); + memcpy ( ( eui64 + 5 ), ( ll_addr + 3 ), 3 ); + *( ( uint16_t * ) ( eui64 + 3 ) ) = htons ( 0xfffe ); + return 0; +} + /** Ethernet protocol */ struct ll_protocol ethernet_protocol __ll_protocol = { .name = "Ethernet", @@ -178,6 +193,7 @@ struct ll_protocol ethernet_protocol __ll_protocol = { .ntoa = eth_ntoa, .mc_hash = eth_mc_hash, .eth_addr = eth_eth_addr, + .eui64 = eth_eui64, }; /** From 0f787df28446b5d9d3f139f414cfb38761c9ad09 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 31 Aug 2013 03:23:24 +0100 Subject: [PATCH 07/18] [ethernet] Add support for generating multicast hash for IPv6 addresses Signed-off-by: Michael Brown --- src/net/ethernet.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/net/ethernet.c b/src/net/ethernet.c index 013b2d76..a2e56589 100644 --- a/src/net/ethernet.c +++ b/src/net/ethernet.c @@ -149,6 +149,11 @@ int eth_mc_hash ( unsigned int af, const void *net_addr, void *ll_addr ) { ll_addr_bytes[4] = net_addr_bytes[2]; ll_addr_bytes[5] = net_addr_bytes[3]; return 0; + case AF_INET6: + ll_addr_bytes[0] = 0x33; + ll_addr_bytes[1] = 0x33; + memcpy ( &ll_addr_bytes[2], &net_addr_bytes[12], 4 ); + return 0; default: return -ENOTSUP; } From 8a2dc7a58807bd3106cb4aed83623ed39f9b328f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 1 Sep 2013 17:38:40 +0100 Subject: [PATCH 08/18] [linux] Apply MAC address prior to registering network device Signed-off-by: Michael Brown --- src/drivers/linux/tap.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/drivers/linux/tap.c b/src/drivers/linux/tap.c index 99937d74..97943665 100644 --- a/src/drivers/linux/tap.c +++ b/src/drivers/linux/tap.c @@ -200,11 +200,6 @@ static int tap_probe(struct linux_device *device, struct linux_device_request *r netdev->dev = &device->dev; memset(nic, 0, sizeof(*nic)); - if ((rc = register_netdev(netdev)) != 0) - goto err_register; - - netdev_link_up(netdev); - /* Look for the mandatory if setting */ if_setting = linux_find_setting("if", &request->settings); @@ -224,6 +219,12 @@ static int tap_probe(struct linux_device *device, struct linux_device_request *r /* Apply rest of the settings */ linux_apply_settings(&request->settings, &netdev->settings.settings); + /* Register network device */ + if ((rc = register_netdev(netdev)) != 0) + goto err_register; + + netdev_link_up(netdev); + return 0; err_settings: From 6bf36f57a0f7a22ffa85ae4995933077df62e309 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 1 Sep 2013 20:55:18 +0100 Subject: [PATCH 09/18] [tcpip] Pass through network device to transport layer protocols NDP requires knowledge of the network device on which a packet was received. Signed-off-by: Michael Brown --- src/include/ipxe/tcpip.h | 8 +++++--- src/net/icmp.c | 5 ++++- src/net/icmpv6.c | 2 +- src/net/ipv4.c | 2 +- src/net/ipv6.c | 7 ++++--- src/net/tcp.c | 2 ++ src/net/tcpip.c | 8 +++++--- src/net/udp.c | 5 ++++- 8 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/include/ipxe/tcpip.h b/src/include/ipxe/tcpip.h index 0cc688a9..b2c559e2 100644 --- a/src/include/ipxe/tcpip.h +++ b/src/include/ipxe/tcpip.h @@ -69,6 +69,7 @@ struct tcpip_protocol { * Process received packet * * @v iobuf I/O buffer + * @v netdev Network device * @v st_src Partially-filled source address * @v st_dest Partially-filled destination address * @v pshdr_csum Pseudo-header checksum @@ -76,7 +77,8 @@ struct tcpip_protocol { * * This method takes ownership of the I/O buffer. */ - int ( * rx ) ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src, + int ( * rx ) ( struct io_buffer *iobuf, struct net_device *netdev, + struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ); /** * Transport-layer protocol number @@ -128,8 +130,8 @@ struct tcpip_net_protocol { /** Declare a TCP/IP network-layer protocol */ #define __tcpip_net_protocol __table_entry ( TCPIP_NET_PROTOCOLS, 01 ) -extern int tcpip_rx ( struct io_buffer *iobuf, uint8_t tcpip_proto, - struct sockaddr_tcpip *st_src, +extern int tcpip_rx ( struct io_buffer *iobuf, struct net_device *netdev, + uint8_t tcpip_proto, struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ); extern int tcpip_tx ( struct io_buffer *iobuf, struct tcpip_protocol *tcpip, struct sockaddr_tcpip *st_src, diff --git a/src/net/icmp.c b/src/net/icmp.c index 830d8292..6142b748 100644 --- a/src/net/icmp.c +++ b/src/net/icmp.c @@ -38,12 +38,15 @@ struct tcpip_protocol icmp_protocol __tcpip_protocol; * Process a received packet * * @v iobuf I/O buffer + * @v netdev Network device * @v st_src Partially-filled source address * @v st_dest Partially-filled destination address * @v pshdr_csum Pseudo-header checksum * @ret rc Return status code */ -static int icmp_rx ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src, +static int icmp_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum __unused ) { struct icmp_header *icmp = iobuf->data; diff --git a/src/net/icmpv6.c b/src/net/icmpv6.c index 262ffc3f..72423806 100644 --- a/src/net/icmpv6.c +++ b/src/net/icmpv6.c @@ -69,7 +69,7 @@ int icmp6_send_solicit ( struct net_device *netdev, struct in6_addr *src __unuse * @v st_src Source address * @v st_dest Destination address */ -static int icmp6_rx ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src, +static int icmp6_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused, struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, __unused uint16_t pshdr_csum ) { struct icmp6_header *icmp6hdr = iobuf->data; diff --git a/src/net/ipv4.c b/src/net/ipv4.c index 106e8e79..bd318806 100644 --- a/src/net/ipv4.c +++ b/src/net/ipv4.c @@ -469,7 +469,7 @@ static int ipv4_rx ( struct io_buffer *iobuf, dest.sin.sin_addr = iphdr->dest; pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM ); iob_pull ( iobuf, hdrlen ); - if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st, + if ( ( rc = tcpip_rx ( iobuf, netdev, iphdr->protocol, &src.st, &dest.st, pshdr_csum ) ) != 0 ) { DBGC ( src.sin.sin_addr, "IPv4 received packet rejected by " "stack: %s\n", strerror ( rc ) ); diff --git a/src/net/ipv6.c b/src/net/ipv6.c index d76e59cb..077118df 100644 --- a/src/net/ipv6.c +++ b/src/net/ipv6.c @@ -260,7 +260,8 @@ static int ipv6_tx ( struct io_buffer *iobuf, * * Refer http://www.iana.org/assignments/ipv6-parameters for the numbers */ -static int ipv6_process_nxt_hdr ( struct io_buffer *iobuf, uint8_t nxt_hdr, +static int ipv6_process_nxt_hdr ( struct io_buffer *iobuf, + struct net_device *netdev, uint8_t nxt_hdr, struct sockaddr_tcpip *src, struct sockaddr_tcpip *dest ) { switch ( nxt_hdr ) { case IP6_HOPBYHOP: @@ -278,7 +279,7 @@ static int ipv6_process_nxt_hdr ( struct io_buffer *iobuf, uint8_t nxt_hdr, return 0; } /* Next header is not a IPv6 extension header */ - return tcpip_rx ( iobuf, nxt_hdr, src, dest, 0 /* fixme */ ); + return tcpip_rx ( iobuf, netdev, nxt_hdr, src, dest, 0 /* fixme */ ); } /** @@ -344,7 +345,7 @@ static int ipv6_rx ( struct io_buffer *iobuf, iob_pull ( iobuf, sizeof ( *ip6hdr ) ); /* Send it to the transport layer */ - return ipv6_process_nxt_hdr ( iobuf, ip6hdr->nxt_hdr, &src.st, &dest.st ); + return ipv6_process_nxt_hdr ( iobuf, netdev, ip6hdr->nxt_hdr, &src.st, &dest.st ); drop: DBG ( "Packet dropped\n" ); diff --git a/src/net/tcp.c b/src/net/tcp.c index 0e18c831..1e1968a0 100644 --- a/src/net/tcp.c +++ b/src/net/tcp.c @@ -1115,12 +1115,14 @@ static void tcp_process_rx_queue ( struct tcp_connection *tcp ) { * Process received packet * * @v iobuf I/O buffer + * @v netdev Network device * @v st_src Partially-filled source address * @v st_dest Partially-filled destination address * @v pshdr_csum Pseudo-header checksum * @ret rc Return status code */ static int tcp_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest __unused, uint16_t pshdr_csum ) { diff --git a/src/net/tcpip.c b/src/net/tcpip.c index 721a4e48..0e467144 100644 --- a/src/net/tcpip.c +++ b/src/net/tcpip.c @@ -20,6 +20,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); /** Process a received TCP/IP packet * * @v iobuf I/O buffer + * @v netdev Network device * @v tcpip_proto Transport-layer protocol number * @v st_src Partially-filled source address * @v st_dest Partially-filled destination address @@ -32,8 +33,8 @@ FILE_LICENCE ( GPL2_OR_LATER ); * address family and the network-layer addresses, but leave the ports * and the rest of the structures as zero). */ -int tcpip_rx ( struct io_buffer *iobuf, uint8_t tcpip_proto, - struct sockaddr_tcpip *st_src, +int tcpip_rx ( struct io_buffer *iobuf, struct net_device *netdev, + uint8_t tcpip_proto, struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ) { struct tcpip_protocol *tcpip; @@ -42,7 +43,8 @@ int tcpip_rx ( struct io_buffer *iobuf, uint8_t tcpip_proto, for_each_table_entry ( tcpip, TCPIP_PROTOCOLS ) { if ( tcpip->tcpip_proto == tcpip_proto ) { DBG ( "TCP/IP received %s packet\n", tcpip->name ); - return tcpip->rx ( iobuf, st_src, st_dest, pshdr_csum ); + return tcpip->rx ( iobuf, netdev, st_src, st_dest, + pshdr_csum ); } } diff --git a/src/net/udp.c b/src/net/udp.c index edc7488a..596f242a 100644 --- a/src/net/udp.c +++ b/src/net/udp.c @@ -247,12 +247,15 @@ static struct udp_connection * udp_demux ( struct sockaddr_tcpip *local ) { * Process a received packet * * @v iobuf I/O buffer + * @v netdev Network device * @v st_src Partially-filled source address * @v st_dest Partially-filled destination address * @v pshdr_csum Pseudo-header checksum * @ret rc Return status code */ -static int udp_rx ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src, +static int udp_rx ( struct io_buffer *iobuf, + struct net_device *netdev __unused, + struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ) { struct udp_header *udphdr = iobuf->data; struct udp_connection *udp; From c6a04085d25721a29af75beeb7c9c2e08a610cf2 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 1 Sep 2013 16:13:58 +0100 Subject: [PATCH 10/18] [neighbour] Generalise concept of neighbour discovery Split the protocol-independent portions of arp.c into a separate file neighbour.c, to allow for sharing of functionality between IPv4+ARP and IPv6+NDP. Signed-off-by: Michael Brown --- src/include/ipxe/arp.h | 24 +- src/include/ipxe/errfile.h | 1 + src/include/ipxe/neighbour.h | 44 ++++ src/net/arp.c | 388 +++------------------------- src/net/neighbour.c | 479 +++++++++++++++++++++++++++++++++++ 5 files changed, 579 insertions(+), 357 deletions(-) create mode 100644 src/include/ipxe/neighbour.h create mode 100644 src/net/neighbour.c diff --git a/src/include/ipxe/arp.h b/src/include/ipxe/arp.h index 00396d82..e30ae6b7 100644 --- a/src/include/ipxe/arp.h +++ b/src/include/ipxe/arp.h @@ -11,6 +11,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #include #include +#include /** A network-layer protocol that relies upon ARP */ struct arp_net_protocol { @@ -34,9 +35,26 @@ struct arp_net_protocol { #define __arp_net_protocol __table_entry ( ARP_NET_PROTOCOLS, 01 ) extern struct net_protocol arp_protocol __net_protocol; +extern struct neighbour_discovery arp_discovery; -extern int arp_tx ( struct io_buffer *iobuf, struct net_device *netdev, - struct net_protocol *net_protocol, const void *net_dest, - const void *net_source, const void *ll_source ); +/** + * Transmit packet, determining link-layer address via ARP + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v net_source Source network-layer address + * @v ll_source Source link-layer address + * @ret rc Return status code + */ +static inline int arp_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *net_source, + const void *ll_source ) { + + return neighbour_tx ( iobuf, netdev, net_protocol, net_dest, + &arp_discovery, net_source, ll_source ); +} #endif /* _IPXE_ARP_H */ diff --git a/src/include/ipxe/errfile.h b/src/include/ipxe/errfile.h index 801d579b..83675796 100644 --- a/src/include/ipxe/errfile.h +++ b/src/include/ipxe/errfile.h @@ -214,6 +214,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #define ERRFILE_nfs_open ( ERRFILE_NET | 0x00340000 ) #define ERRFILE_mount ( ERRFILE_NET | 0x00350000 ) #define ERRFILE_oncrpc_iob ( ERRFILE_NET | 0x00360000 ) +#define ERRFILE_neighbour ( ERRFILE_NET | 0x00370000 ) #define ERRFILE_image ( ERRFILE_IMAGE | 0x00000000 ) #define ERRFILE_elf ( ERRFILE_IMAGE | 0x00010000 ) diff --git a/src/include/ipxe/neighbour.h b/src/include/ipxe/neighbour.h new file mode 100644 index 00000000..5720e8b0 --- /dev/null +++ b/src/include/ipxe/neighbour.h @@ -0,0 +1,44 @@ +#ifndef _IPXE_NEIGHBOUR_H +#define _IPXE_NEIGHBOUR_H + +/** @file + * + * Neighbour discovery + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include + +/** A neighbour discovery protocol */ +struct neighbour_discovery { + /** Name */ + const char *name; + /** + * Transmit neighbour discovery request + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v net_source Source network-layer address + * @ret rc Return status code + */ + int ( * tx_request ) ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *net_source ); +}; + +extern int neighbour_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, + struct neighbour_discovery *discovery, + const void *net_source, const void *ll_source ); +extern int neighbour_update ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *ll_dest ); +extern int neighbour_define ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *ll_dest ); + +#endif /* _IPXE_NEIGHBOUR_H */ diff --git a/src/net/arp.c b/src/net/arp.c index b94eb906..261e681e 100644 --- a/src/net/arp.c +++ b/src/net/arp.c @@ -28,11 +28,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); #include #include #include -#include -#include -#include -#include -#include +#include #include /** @file @@ -45,291 +41,30 @@ FILE_LICENCE ( GPL2_OR_LATER ); * */ -/** ARP minimum timeout */ -#define ARP_MIN_TIMEOUT ( TICKS_PER_SEC / 8 ) - -/** ARP maximum timeout */ -#define ARP_MAX_TIMEOUT ( TICKS_PER_SEC * 3 ) - -/** An ARP cache entry */ -struct arp_entry { - /** Reference count */ - struct refcnt refcnt; - /** List of ARP cache entries */ - struct list_head list; - /** Network device */ - struct net_device *netdev; - /** Network-layer protocol */ - struct net_protocol *net_protocol; - /** Network-layer destination address */ - uint8_t net_dest[MAX_NET_ADDR_LEN]; - /** Network-layer source address */ - uint8_t net_source[MAX_NET_ADDR_LEN]; - /** Link-layer destination address */ - uint8_t ll_dest[MAX_LL_ADDR_LEN]; - /** Retransmission timer */ - struct retry_timer timer; - /** Pending I/O buffers */ - struct list_head tx_queue; -}; - -/** The ARP cache */ -static LIST_HEAD ( arp_entries ); - struct net_protocol arp_protocol __net_protocol; -static void arp_expired ( struct retry_timer *timer, int over ); - /** - * Free ARP cache entry - * - * @v refcnt Reference count - */ -static void arp_free ( struct refcnt *refcnt ) { - struct arp_entry *arp = - container_of ( refcnt, struct arp_entry, refcnt ); - - /* Sanity check */ - assert ( list_empty ( &arp->tx_queue ) ); - - /* Drop reference to network device */ - netdev_put ( arp->netdev ); - - /* Free entry */ - free ( arp ); -} - -/** - * Create ARP cache entry + * Transmit ARP request * * @v netdev Network device * @v net_protocol Network-layer protocol * @v net_dest Destination network-layer address * @v net_source Source network-layer address - * @ret arp ARP cache entry, or NULL if allocation failed - */ -static struct arp_entry * arp_create ( struct net_device *netdev, - struct net_protocol *net_protocol, - const void *net_dest, - const void *net_source ) { - struct arp_entry *arp; - - /* Allocate and initialise entry */ - arp = zalloc ( sizeof ( *arp ) ); - if ( ! arp ) - return NULL; - ref_init ( &arp->refcnt, arp_free ); - arp->netdev = netdev_get ( netdev ); - arp->net_protocol = net_protocol; - memcpy ( arp->net_dest, net_dest, - net_protocol->net_addr_len ); - memcpy ( arp->net_source, net_source, - net_protocol->net_addr_len ); - timer_init ( &arp->timer, arp_expired, &arp->refcnt ); - arp->timer.min_timeout = ARP_MIN_TIMEOUT; - arp->timer.max_timeout = ARP_MAX_TIMEOUT; - INIT_LIST_HEAD ( &arp->tx_queue ); - - /* Start timer running to trigger initial transmission */ - start_timer_nodelay ( &arp->timer ); - - /* Transfer ownership to cache */ - list_add ( &arp->list, &arp_entries ); - - DBGC ( arp, "ARP %p %s %s %s created\n", arp, netdev->name, - net_protocol->name, net_protocol->ntoa ( net_dest ) ); - return arp; -} - -/** - * Find entry in the ARP cache - * - * @v netdev Network device - * @v net_protocol Network-layer protocol - * @v net_dest Destination network-layer address - * @ret arp ARP cache entry, or NULL if not found - */ -static struct arp_entry * arp_find ( struct net_device *netdev, - struct net_protocol *net_protocol, - const void *net_dest ) { - struct arp_entry *arp; - - list_for_each_entry ( arp, &arp_entries, list ) { - if ( ( arp->netdev == netdev ) && - ( arp->net_protocol == net_protocol ) && - ( memcmp ( arp->net_dest, net_dest, - net_protocol->net_addr_len ) == 0 ) ) { - - /* Move to start of cache */ - list_del ( &arp->list ); - list_add ( &arp->list, &arp_entries ); - - return arp; - } - } - return NULL; -} - -/** - * Destroy ARP cache entry - * - * @v arp ARP cache entry - * @v rc Reason for destruction - */ -static void arp_destroy ( struct arp_entry *arp, int rc ) { - struct net_device *netdev = arp->netdev; - struct net_protocol *net_protocol = arp->net_protocol; - struct io_buffer *iobuf; - - /* Take ownership from cache */ - list_del ( &arp->list ); - - /* Stop timer */ - stop_timer ( &arp->timer ); - - /* Discard any outstanding I/O buffers */ - while ( ( iobuf = list_first_entry ( &arp->tx_queue, struct io_buffer, - list ) ) != NULL ) { - DBGC2 ( arp, "ARP %p %s %s %s discarding deferred packet: " - "%s\n", arp, netdev->name, net_protocol->name, - net_protocol->ntoa ( arp->net_dest ), strerror ( rc ) ); - list_del ( &iobuf->list ); - netdev_tx_err ( arp->netdev, iobuf, rc ); - } - - DBGC ( arp, "ARP %p %s %s %s destroyed: %s\n", arp, netdev->name, - net_protocol->name, net_protocol->ntoa ( arp->net_dest ), - strerror ( rc ) ); - - /* Drop remaining reference */ - ref_put ( &arp->refcnt ); -} - -/** - * Test if ARP cache entry has a valid link-layer address - * - * @v arp ARP cache entry - * @ret resolved ARP cache entry is resolved - */ -static inline int arp_resolved ( struct arp_entry *arp ) { - return ( ! timer_running ( &arp->timer ) ); -} - -/** - * Transmit packet, determining link-layer address via ARP - * - * @v iobuf I/O buffer - * @v netdev Network device - * @v net_protocol Network-layer protocol - * @v net_dest Destination network-layer address - * @v net_source Source network-layer address - * @v ll_source Source link-layer address * @ret rc Return status code */ -int arp_tx ( struct io_buffer *iobuf, struct net_device *netdev, - struct net_protocol *net_protocol, const void *net_dest, - const void *net_source, const void *ll_source ) { - struct arp_entry *arp; - - /* Find or create ARP cache entry */ - arp = arp_find ( netdev, net_protocol, net_dest ); - if ( ! arp ) { - arp = arp_create ( netdev, net_protocol, net_dest, - net_source ); - if ( ! arp ) - return -ENOMEM; - } - - /* If a link-layer address is available then transmit - * immediately, otherwise queue for later transmission. - */ - if ( arp_resolved ( arp ) ) { - return net_tx ( iobuf, netdev, net_protocol, arp->ll_dest, - ll_source ); - } else { - DBGC2 ( arp, "ARP %p %s %s %s deferring packet\n", - arp, netdev->name, net_protocol->name, - net_protocol->ntoa ( net_dest ) ); - list_add_tail ( &iobuf->list, &arp->tx_queue ); - return -EAGAIN; - } -} - -/** - * Update ARP cache entry - * - * @v arp ARP cache entry - * @v ll_dest Destination link-layer address - */ -static void arp_update ( struct arp_entry *arp, const void *ll_dest ) { - struct net_device *netdev = arp->netdev; +static int arp_tx_request ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *net_source ) { struct ll_protocol *ll_protocol = netdev->ll_protocol; - struct net_protocol *net_protocol = arp->net_protocol; - struct io_buffer *iobuf; - int rc; - - DBGC ( arp, "ARP %p %s %s %s updated => %s\n", arp, netdev->name, - net_protocol->name, net_protocol->ntoa ( arp->net_dest ), - ll_protocol->ntoa ( ll_dest ) ); - - /* Fill in link-layer address */ - memcpy ( arp->ll_dest, ll_dest, ll_protocol->ll_addr_len ); - - /* Stop retransmission timer */ - stop_timer ( &arp->timer ); - - /* Transmit any packets in queue. Take out a temporary - * reference on the entry to prevent it from going out of - * scope during the call to net_tx(). - */ - ref_get ( &arp->refcnt ); - while ( ( iobuf = list_first_entry ( &arp->tx_queue, struct io_buffer, - list ) ) != NULL ) { - DBGC2 ( arp, "ARP %p %s %s %s transmitting deferred packet\n", - arp, netdev->name, net_protocol->name, - net_protocol->ntoa ( arp->net_dest ) ); - list_del ( &iobuf->list ); - if ( ( rc = net_tx ( iobuf, netdev, net_protocol, ll_dest, - netdev->ll_addr ) ) != 0 ) { - DBGC ( arp, "ARP %p could not transmit deferred " - "packet: %s\n", arp, strerror ( rc ) ); - /* Ignore error and continue */ - } - } - ref_put ( &arp->refcnt ); -} - -/** - * Handle ARP timer expiry - * - * @v timer Retry timer - * @v fail Failure indicator - */ -static void arp_expired ( struct retry_timer *timer, int fail ) { - struct arp_entry *arp = container_of ( timer, struct arp_entry, timer ); - struct net_device *netdev = arp->netdev; - struct ll_protocol *ll_protocol = netdev->ll_protocol; - struct net_protocol *net_protocol = arp->net_protocol; struct io_buffer *iobuf; struct arphdr *arphdr; int rc; - /* If we have failed, destroy the cache entry */ - if ( fail ) { - arp_destroy ( arp, -ETIMEDOUT ); - return; - } - - /* Restart the timer */ - start_timer ( &arp->timer ); - /* Allocate ARP packet */ iobuf = alloc_iob ( MAX_LL_HEADER_LEN + sizeof ( *arphdr ) + ( 2 * ( MAX_LL_ADDR_LEN + MAX_NET_ADDR_LEN ) ) ); - if ( ! iobuf ) { - /* Leave timer running and try again later */ - return; - } + if ( ! iobuf ) + return -ENOMEM; iob_reserve ( iobuf, MAX_LL_HEADER_LEN ); /* Build up ARP request */ @@ -342,21 +77,30 @@ static void arp_expired ( struct retry_timer *timer, int fail ) { memcpy ( iob_put ( iobuf, ll_protocol->ll_addr_len ), netdev->ll_addr, ll_protocol->ll_addr_len ); memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ), - arp->net_source, net_protocol->net_addr_len ); + net_source, net_protocol->net_addr_len ); memset ( iob_put ( iobuf, ll_protocol->ll_addr_len ), 0, ll_protocol->ll_addr_len ); memcpy ( iob_put ( iobuf, net_protocol->net_addr_len ), - arp->net_dest, net_protocol->net_addr_len ); + net_dest, net_protocol->net_addr_len ); /* Transmit ARP request */ if ( ( rc = net_tx ( iobuf, netdev, &arp_protocol, netdev->ll_broadcast, netdev->ll_addr ) ) != 0 ) { - DBGC ( arp, "ARP %p could not transmit request: %s\n", - arp, strerror ( rc ) ); - return; + DBGC ( netdev, "ARP %s %s %s could not transmit request: %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( net_dest ), strerror ( rc ) ); + return rc; } + + return 0; } +/** ARP neighbour discovery protocol */ +struct neighbour_discovery arp_discovery = { + .name = "ARP", + .tx_request = arp_tx_request, +}; + /** * Identify ARP protocol * @@ -368,9 +112,8 @@ static struct arp_net_protocol * arp_find_protocol ( uint16_t net_proto ) { struct arp_net_protocol *arp_net_protocol; for_each_table_entry ( arp_net_protocol, ARP_NET_PROTOCOLS ) { - if ( arp_net_protocol->net_protocol->net_proto == net_proto ) { + if ( arp_net_protocol->net_protocol->net_proto == net_proto ) return arp_net_protocol; - } } return NULL; } @@ -392,7 +135,6 @@ static int arp_rx ( struct io_buffer *iobuf, struct net_device *netdev, struct arp_net_protocol *arp_net_protocol; struct net_protocol *net_protocol; struct ll_protocol *ll_protocol; - struct arp_entry *arp; int rc; /* Identify network-layer and link-layer protocols */ @@ -412,11 +154,9 @@ static int arp_rx ( struct io_buffer *iobuf, struct net_device *netdev, goto done; } - /* See if we have an entry for this sender, and update it if so */ - arp = arp_find ( netdev, net_protocol, arp_sender_pa ( arphdr ) ); - if ( arp ) { - arp_update ( arp, arp_sender_ha ( arphdr ) ); - } + /* Update neighbour cache entry for this sender, if any */ + neighbour_update ( netdev, net_protocol, arp_sender_pa ( arphdr ), + arp_sender_ha ( arphdr ) ); /* If it's not a request, there's nothing more to do */ if ( arphdr->ar_op != htons ( ARPOP_REQUEST ) ) { @@ -431,10 +171,10 @@ static int arp_rx ( struct io_buffer *iobuf, struct net_device *netdev, } /* Change request to a reply */ - DBGC ( netdev, "ARP reply %s %s %s => %s %s\n", - netdev->name, net_protocol->name, - net_protocol->ntoa ( arp_target_pa ( arphdr ) ), - ll_protocol->name, ll_protocol->ntoa ( netdev->ll_addr ) ); + DBGC2 ( netdev, "ARP %s %s %s reply => %s %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( arp_target_pa ( arphdr ) ), + ll_protocol->name, ll_protocol->ntoa ( netdev->ll_addr ) ); arphdr->ar_op = htons ( ARPOP_REPLY ); memswap ( arp_sender_ha ( arphdr ), arp_target_ha ( arphdr ), arphdr->ar_hln + arphdr->ar_pln ); @@ -444,8 +184,10 @@ static int arp_rx ( struct io_buffer *iobuf, struct net_device *netdev, if ( ( rc = net_tx ( iob_disown ( iobuf ), netdev, &arp_protocol, arp_target_ha ( arphdr ), netdev->ll_addr ) ) != 0 ) { - DBGC ( netdev, "ARP could not transmit reply via %s: %s\n", - netdev->name, strerror ( rc ) ); + DBGC ( netdev, "ARP %s %s %s could not transmit reply: %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( arp_target_pa ( arphdr ) ), + strerror ( rc ) ); goto done; } @@ -469,72 +211,10 @@ static const char * arp_ntoa ( const void *net_addr __unused ) { return ""; } -/** ARP protocol */ +/** ARP network protocol */ struct net_protocol arp_protocol __net_protocol = { .name = "ARP", .net_proto = htons ( ETH_P_ARP ), .rx = arp_rx, .ntoa = arp_ntoa, }; - -/** - * Update ARP cache on network device creation - * - * @v netdev Network device - */ -static int arp_probe ( struct net_device *netdev __unused ) { - /* Nothing to do */ - return 0; -} - -/** - * Update ARP cache on network device state change or removal - * - * @v netdev Network device - */ -static void arp_flush ( struct net_device *netdev ) { - struct arp_entry *arp; - struct arp_entry *tmp; - - /* Remove all ARP cache entries when a network device is closed */ - if ( ! netdev_is_open ( netdev ) ) { - list_for_each_entry_safe ( arp, tmp, &arp_entries, list ) - arp_destroy ( arp, -ENODEV ); - } -} - -/** ARP driver (for net device notifications) */ -struct net_driver arp_net_driver __net_driver = { - .name = "ARP", - .probe = arp_probe, - .notify = arp_flush, - .remove = arp_flush, -}; - -/** - * Discard some cached ARP entries - * - * @ret discarded Number of cached items discarded - */ -static unsigned int arp_discard ( void ) { - struct arp_entry *arp; - - /* Drop oldest cache entry, if any */ - arp = list_last_entry ( &arp_entries, struct arp_entry, list ); - if ( arp ) { - arp_destroy ( arp, -ENOBUFS ); - return 1; - } else { - return 0; - } -} - -/** ARP cache discarder - * - * ARP cache entries are deemed to have a high replacement cost, since - * flushing an active ARP cache entry midway through a TCP transfer - * will cause substantial disruption. - */ -struct cache_discarder arp_discarder __cache_discarder ( CACHE_EXPENSIVE ) = { - .discard = arp_discard, -}; diff --git a/src/net/neighbour.c b/src/net/neighbour.c new file mode 100644 index 00000000..210b7138 --- /dev/null +++ b/src/net/neighbour.c @@ -0,0 +1,479 @@ +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * Neighbour discovery + * + * This file implements the abstract functions of neighbour discovery, + * independent of the underlying network protocol (e.g. ARP or NDP). + * + */ + +/** A neighbour cache entry */ +struct neighbour { + /** Reference count */ + struct refcnt refcnt; + /** List of neighbour cache entries */ + struct list_head list; + + /** Network device */ + struct net_device *netdev; + /** Network-layer protocol */ + struct net_protocol *net_protocol; + /** Network-layer destination address */ + uint8_t net_dest[MAX_NET_ADDR_LEN]; + /** Link-layer destination address */ + uint8_t ll_dest[MAX_LL_ADDR_LEN]; + + /** Neighbour discovery protocol (if any) */ + struct neighbour_discovery *discovery; + /** Network-layer source address (if any) */ + uint8_t net_source[MAX_NET_ADDR_LEN]; + /** Retransmission timer */ + struct retry_timer timer; + + /** Pending I/O buffers */ + struct list_head tx_queue; +}; + +/** Neighbour discovery minimum timeout */ +#define NEIGHBOUR_MIN_TIMEOUT ( TICKS_PER_SEC / 8 ) + +/** Neighbour discovery maximum timeout */ +#define NEIGHBOUR_MAX_TIMEOUT ( TICKS_PER_SEC * 3 ) + +/** The neighbour cache */ +static LIST_HEAD ( neighbours ); + +static void neighbour_expired ( struct retry_timer *timer, int over ); + +/** + * Free neighbour cache entry + * + * @v refcnt Reference count + */ +static void neighbour_free ( struct refcnt *refcnt ) { + struct neighbour *neighbour = + container_of ( refcnt, struct neighbour, refcnt ); + + /* Sanity check */ + assert ( list_empty ( &neighbour->tx_queue ) ); + + /* Drop reference to network device */ + netdev_put ( neighbour->netdev ); + + /* Free neighbour */ + free ( neighbour ); +} + +/** + * Test if neighbour cache entry has a valid link-layer address + * + * @v neighbour Neighbour cache entry + * @ret has_ll_dest Neighbour cache entry has a valid link-layer address + */ +static inline __attribute__ (( always_inline )) int +neighbour_has_ll_dest ( struct neighbour *neighbour ) { + return ( ! timer_running ( &neighbour->timer ) ); +} + +/** + * Create neighbour cache entry + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @ret neighbour Neighbour cache entry, or NULL if allocation failed + */ +static struct neighbour * neighbour_create ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest ) { + struct neighbour *neighbour; + + /* Allocate and initialise entry */ + neighbour = zalloc ( sizeof ( *neighbour ) ); + if ( ! neighbour ) + return NULL; + ref_init ( &neighbour->refcnt, neighbour_free ); + neighbour->netdev = netdev_get ( netdev ); + neighbour->net_protocol = net_protocol; + memcpy ( neighbour->net_dest, net_dest, + net_protocol->net_addr_len ); + timer_init ( &neighbour->timer, neighbour_expired, &neighbour->refcnt ); + neighbour->timer.min_timeout = NEIGHBOUR_MIN_TIMEOUT; + neighbour->timer.max_timeout = NEIGHBOUR_MAX_TIMEOUT; + INIT_LIST_HEAD ( &neighbour->tx_queue ); + + /* Transfer ownership to cache */ + list_add ( &neighbour->list, &neighbours ); + + DBGC ( neighbour, "NEIGHBOUR %s %s %s created\n", netdev->name, + net_protocol->name, net_protocol->ntoa ( net_dest ) ); + return neighbour; +} + +/** + * Find neighbour cache entry + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @ret neighbour Neighbour cache entry, or NULL if not found + */ +static struct neighbour * neighbour_find ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest ) { + struct neighbour *neighbour; + + list_for_each_entry ( neighbour, &neighbours, list ) { + if ( ( neighbour->netdev == netdev ) && + ( neighbour->net_protocol == net_protocol ) && + ( memcmp ( neighbour->net_dest, net_dest, + net_protocol->net_addr_len ) == 0 ) ) { + + /* Move to start of cache */ + list_del ( &neighbour->list ); + list_add ( &neighbour->list, &neighbours ); + + return neighbour; + } + } + return NULL; +} + +/** + * Start neighbour discovery + * + * @v neighbour Neighbour cache entry + * @v discovery Neighbour discovery protocol + * @v net_source Source network-layer address + */ +static void neighbour_discover ( struct neighbour *neighbour, + struct neighbour_discovery *discovery, + const void *net_source ) { + struct net_device *netdev = neighbour->netdev; + struct net_protocol *net_protocol = neighbour->net_protocol; + + /* Record discovery protocol and source network-layer address */ + neighbour->discovery = discovery; + memcpy ( neighbour->net_source, net_source, + net_protocol->net_addr_len ); + + /* Start timer to trigger neighbour discovery */ + start_timer_nodelay ( &neighbour->timer ); + + DBGC ( neighbour, "NEIGHBOUR %s %s %s discovering via %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + neighbour->discovery->name ); +} + +/** + * Complete neighbour discovery + * + * @v neighbour Neighbour cache entry + * @v ll_dest Destination link-layer address + */ +static void neighbour_discovered ( struct neighbour *neighbour, + const void *ll_dest ) { + struct net_device *netdev = neighbour->netdev; + struct ll_protocol *ll_protocol = netdev->ll_protocol; + struct net_protocol *net_protocol = neighbour->net_protocol; + struct io_buffer *iobuf; + int rc; + + /* Fill in link-layer address */ + memcpy ( neighbour->ll_dest, ll_dest, ll_protocol->ll_addr_len ); + DBGC ( neighbour, "NEIGHBOUR %s %s %s is %s %s\n", netdev->name, + net_protocol->name, net_protocol->ntoa ( neighbour->net_dest ), + ll_protocol->name, ll_protocol->ntoa ( neighbour->ll_dest ) ); + + /* Stop retransmission timer */ + stop_timer ( &neighbour->timer ); + + /* Transmit any packets in queue. Take out a temporary + * reference on the entry to prevent it from going out of + * scope during the call to net_tx(). + */ + ref_get ( &neighbour->refcnt ); + while ( ( iobuf = list_first_entry ( &neighbour->tx_queue, + struct io_buffer, list )) != NULL){ + DBGC2 ( neighbour, "NEIGHBOUR %s %s %s transmitting deferred " + "packet\n", netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ) ); + list_del ( &iobuf->list ); + if ( ( rc = net_tx ( iobuf, netdev, net_protocol, ll_dest, + netdev->ll_addr ) ) != 0 ) { + DBGC ( neighbour, "NEIGHBOUR %s %s %s could not " + "transmit deferred packet: %s\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + strerror ( rc ) ); + /* Ignore error and continue */ + } + } + ref_put ( &neighbour->refcnt ); +} + +/** + * Destroy neighbour cache entry + * + * @v neighbour Neighbour cache entry + * @v rc Reason for destruction + */ +static void neighbour_destroy ( struct neighbour *neighbour, int rc ) { + struct net_device *netdev = neighbour->netdev; + struct net_protocol *net_protocol = neighbour->net_protocol; + struct io_buffer *iobuf; + + /* Take ownership from cache */ + list_del ( &neighbour->list ); + + /* Stop timer */ + stop_timer ( &neighbour->timer ); + + /* Discard any outstanding I/O buffers */ + while ( ( iobuf = list_first_entry ( &neighbour->tx_queue, + struct io_buffer, list )) != NULL){ + DBGC2 ( neighbour, "NEIGHBOUR %s %s %s discarding deferred " + "packet: %s\n", netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + strerror ( rc ) ); + list_del ( &iobuf->list ); + netdev_tx_err ( neighbour->netdev, iobuf, rc ); + } + + DBGC ( neighbour, "NEIGHBOUR %s %s %s destroyed: %s\n", netdev->name, + net_protocol->name, net_protocol->ntoa ( neighbour->net_dest ), + strerror ( rc ) ); + + /* Drop remaining reference */ + ref_put ( &neighbour->refcnt ); +} + +/** + * Handle neighbour timer expiry + * + * @v timer Retry timer + * @v fail Failure indicator + */ +static void neighbour_expired ( struct retry_timer *timer, int fail ) { + struct neighbour *neighbour = + container_of ( timer, struct neighbour, timer ); + struct net_device *netdev = neighbour->netdev; + struct net_protocol *net_protocol = neighbour->net_protocol; + struct neighbour_discovery *discovery = + neighbour->discovery; + const void *net_dest = neighbour->net_dest; + const void *net_source = neighbour->net_source; + int rc; + + /* If we have failed, destroy the cache entry */ + if ( fail ) { + neighbour_destroy ( neighbour, -ETIMEDOUT ); + return; + } + + /* Restart the timer */ + start_timer ( &neighbour->timer ); + + /* Transmit neighbour request */ + if ( ( rc = discovery->tx_request ( netdev, net_protocol, net_dest, + net_source ) ) != 0 ) { + DBGC ( neighbour, "NEIGHBOUR %s %s %s could not transmit %s " + "request: %s\n", netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + neighbour->discovery->name, strerror ( rc ) ); + /* Retransmit when timer expires */ + return; + } +} + +/** + * Transmit packet, determining link-layer address via neighbour discovery + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v discovery Neighbour discovery protocol + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v net_source Source network-layer address + * @v ll_source Source link-layer address + * @ret rc Return status code + */ +int neighbour_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, const void *net_dest, + struct neighbour_discovery *discovery, + const void *net_source, const void *ll_source ) { + struct neighbour *neighbour; + + /* Find or create neighbour cache entry */ + neighbour = neighbour_find ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) { + neighbour = neighbour_create ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) + return -ENOMEM; + neighbour_discover ( neighbour, discovery, net_source ); + } + + /* If a link-layer address is available then transmit + * immediately, otherwise queue for later transmission. + */ + if ( neighbour_has_ll_dest ( neighbour ) ) { + return net_tx ( iobuf, netdev, net_protocol, neighbour->ll_dest, + ll_source ); + } else { + DBGC2 ( neighbour, "NEIGHBOUR %s %s %s deferring packet\n", + netdev->name, net_protocol->name, + net_protocol->ntoa ( net_dest ) ); + list_add_tail ( &iobuf->list, &neighbour->tx_queue ); + return -EAGAIN; + } +} + +/** + * Update existing neighbour cache entry + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v ll_dest Destination link-layer address + * @ret rc Return status code + */ +int neighbour_update ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *ll_dest ) { + struct neighbour *neighbour; + + /* Find neighbour cache entry */ + neighbour = neighbour_find ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) + return -ENOENT; + + /* Set destination address */ + neighbour_discovered ( neighbour, ll_dest ); + + return 0; +} + +/** + * Define neighbour cache entry + * + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v ll_dest Destination link-layer address, if known + * @ret rc Return status code + */ +int neighbour_define ( struct net_device *netdev, + struct net_protocol *net_protocol, + const void *net_dest, const void *ll_dest ) { + struct neighbour *neighbour; + + /* Find or create neighbour cache entry */ + neighbour = neighbour_find ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) { + neighbour = neighbour_create ( netdev, net_protocol, net_dest ); + if ( ! neighbour ) + return -ENOMEM; + } + + /* Set destination address */ + neighbour_discovered ( neighbour, ll_dest ); + + return 0; +} + +/** + * Update neighbour cache on network device creation + * + * @v netdev Network device + */ +static int neighbour_probe ( struct net_device *netdev __unused ) { + /* Nothing to do */ + return 0; +} + +/** + * Update neighbour cache on network device state change or removal + * + * @v netdev Network device + */ +static void neighbour_flush ( struct net_device *netdev ) { + struct neighbour *neighbour; + struct neighbour *tmp; + + /* Remove all neighbour cache entries when a network device is closed */ + if ( ! netdev_is_open ( netdev ) ) { + list_for_each_entry_safe ( neighbour, tmp, &neighbours, list ) + neighbour_destroy ( neighbour, -ENODEV ); + } +} + +/** Neighbour driver (for net device notifications) */ +struct net_driver neighbour_net_driver __net_driver = { + .name = "Neighbour", + .probe = neighbour_probe, + .notify = neighbour_flush, + .remove = neighbour_flush, +}; + +/** + * Discard some cached neighbour entries + * + * @ret discarded Number of cached items discarded + */ +static unsigned int neighbour_discard ( void ) { + struct neighbour *neighbour; + + /* Drop oldest cache entry, if any */ + neighbour = list_last_entry ( &neighbours, struct neighbour, list ); + if ( neighbour ) { + neighbour_destroy ( neighbour, -ENOBUFS ); + return 1; + } else { + return 0; + } +} + +/** + * Neighbour cache discarder + * + * Neighbour cache entries are deemed to have a high replacement cost, + * since flushing an active neighbour cache entry midway through a TCP + * transfer will cause substantial disruption. + */ +struct cache_discarder neighbour_discarder __cache_discarder (CACHE_EXPENSIVE)={ + .discard = neighbour_discard, +}; From 43307b4e39300f6602a975bbb710b443e5fcd2b5 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 3 Sep 2013 16:28:34 +0100 Subject: [PATCH 11/18] [ipv4] Abstract out protocol-specific portions of "route" command Signed-off-by: Michael Brown --- src/config/config_route.c | 24 ++++++++++++++++ src/include/usr/route.h | 23 +++++++++++++++- src/usr/route.c | 24 ++++++++-------- src/usr/route_ipv4.c | 58 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 14 deletions(-) create mode 100644 src/config/config_route.c create mode 100644 src/usr/route_ipv4.c diff --git a/src/config/config_route.c b/src/config/config_route.c new file mode 100644 index 00000000..c31d2dae --- /dev/null +++ b/src/config/config_route.c @@ -0,0 +1,24 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include + +/** @file + * + * Routing management configuration options + * + */ + +/* + * Drag in routing management for relevant protocols + * + */ +#ifdef NET_PROTO_IPV4 +REQUIRE_OBJECT ( route_ipv4 ); +#endif diff --git a/src/include/usr/route.h b/src/include/usr/route.h index 855fa7ba..b914f4b8 100644 --- a/src/include/usr/route.h +++ b/src/include/usr/route.h @@ -3,12 +3,33 @@ /** @file * - * Routing table management + * Routing management * */ FILE_LICENCE ( GPL2_OR_LATER ); +#include + +/** A routing family */ +struct routing_family { + /** + * Print routes for a network device + * + * @v netdev Network device + */ + void ( * print ) ( struct net_device *netdev ); +}; + +/** Routing family table */ +#define ROUTING_FAMILIES __table ( struct routing_family, "routing_families" ) + +/** Declare a routing family */ +#define __routing_family( order ) __table_entry ( ROUTING_FAMILIES, order ) + +#define ROUTING_IPV4 01 +#define ROUTING_IPV6 02 + extern void route ( void ); #endif /* _USR_ROUTE_H */ diff --git a/src/usr/route.c b/src/usr/route.c index e393e38d..ba4cc322 100644 --- a/src/usr/route.c +++ b/src/usr/route.c @@ -19,28 +19,26 @@ FILE_LICENCE ( GPL2_OR_LATER ); -#include #include -#include #include /** @file * - * Routing table management + * Routing management * */ +/** + * Print routing table + * + */ void route ( void ) { - struct ipv4_miniroute *miniroute; + struct net_device *netdev; + struct routing_family *family; - list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) { - printf ( "%s: %s/", miniroute->netdev->name, - inet_ntoa ( miniroute->address ) ); - printf ( "%s", inet_ntoa ( miniroute->netmask ) ); - if ( miniroute->gateway.s_addr ) - printf ( " gw %s", inet_ntoa ( miniroute->gateway ) ); - if ( ! netdev_is_open ( miniroute->netdev ) ) - printf ( " (inaccessible)" ); - printf ( "\n" ); + for_each_netdev ( netdev ) { + for_each_table_entry ( family, ROUTING_FAMILIES ) { + family->print ( netdev ); + } } } diff --git a/src/usr/route_ipv4.c b/src/usr/route_ipv4.c new file mode 100644 index 00000000..b4d1b7bf --- /dev/null +++ b/src/usr/route_ipv4.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include + +/** @file + * + * IPv4 routing management + * + */ + +/** + * Print IPv4 routing table + * + * @v netdev Network device + */ +static void route_ipv4_print ( struct net_device *netdev ) { + struct ipv4_miniroute *miniroute; + + list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) { + if ( miniroute->netdev != netdev ) + continue; + printf ( "%s: %s/", netdev->name, + inet_ntoa ( miniroute->address ) ); + printf ( "%s", inet_ntoa ( miniroute->netmask ) ); + if ( miniroute->gateway.s_addr ) + printf ( " gw %s", inet_ntoa ( miniroute->gateway ) ); + if ( ! netdev_is_open ( miniroute->netdev ) ) + printf ( " (inaccessible)" ); + printf ( "\n" ); + } +} + +/** IPv4 routing family */ +struct routing_family ipv4_routing_family __routing_family ( ROUTING_IPV4 ) = { + .print = route_ipv4_print, +}; From f7f3087cc542d76f19ba6362b0837dcf1baf86b8 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 26 Aug 2013 14:23:54 +0100 Subject: [PATCH 12/18] [ipv6] Replace IPv6 stack Replace the existing partially-implemented IPv6 stack with a fresh implementation. This implementation is not yet complete. The IPv6 transmit and receive datapaths are functional (including fragment reassembly and parsing of arbitrary extension headers). NDP neighbour solicitations and advertisements are supported. ICMPv6 echo is supported. At present, only link-local addresses may be used, and there is no way to specify an IPv6 address as part of a URI (either directly or via a DNS lookup). Signed-off-by: Michael Brown --- src/config/config.c | 3 + src/config/config_route.c | 3 + src/config/general.h | 1 + src/include/ipxe/icmp6.h | 59 --- src/include/ipxe/icmpv6.h | 78 ++++ src/include/ipxe/in.h | 31 +- src/include/ipxe/ip6.h | 80 ---- src/include/ipxe/ipv6.h | 218 ++++++++++ src/include/ipxe/ndp.h | 93 +++- src/net/icmpv6.c | 266 +++++++----- src/net/ipv4.c | 2 +- src/net/ipv6.c | 877 +++++++++++++++++++++++++++----------- src/net/ndp.c | 484 ++++++++++++++------- src/tests/ipv6_test.c | 115 +++++ src/tests/tests.c | 1 + src/usr/route_ipv6.c | 58 +++ 16 files changed, 1693 insertions(+), 676 deletions(-) delete mode 100644 src/include/ipxe/icmp6.h create mode 100644 src/include/ipxe/icmpv6.h delete mode 100644 src/include/ipxe/ip6.h create mode 100644 src/include/ipxe/ipv6.h create mode 100644 src/tests/ipv6_test.c create mode 100644 src/usr/route_ipv6.c diff --git a/src/config/config.c b/src/config/config.c index f063523c..6596e951 100644 --- a/src/config/config.c +++ b/src/config/config.c @@ -101,6 +101,9 @@ REQUIRE_OBJECT ( debugcon ); #ifdef NET_PROTO_IPV4 REQUIRE_OBJECT ( ipv4 ); #endif +#ifdef NET_PROTO_IPV6 +REQUIRE_OBJECT ( ipv6 ); +#endif /* * Drag in all requested PXE support diff --git a/src/config/config_route.c b/src/config/config_route.c index c31d2dae..33e18cdd 100644 --- a/src/config/config_route.c +++ b/src/config/config_route.c @@ -22,3 +22,6 @@ FILE_LICENCE ( GPL2_OR_LATER ); #ifdef NET_PROTO_IPV4 REQUIRE_OBJECT ( route_ipv4 ); #endif +#ifdef NET_PROTO_IPV6 +REQUIRE_OBJECT ( route_ipv6 ); +#endif diff --git a/src/config/general.h b/src/config/general.h index ae14ed3d..2e93efde 100644 --- a/src/config/general.h +++ b/src/config/general.h @@ -40,6 +40,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); */ #define NET_PROTO_IPV4 /* IPv4 protocol */ +#undef NET_PROTO_IPV6 /* IPv6 protocol */ #undef NET_PROTO_FCOE /* Fibre Channel over Ethernet protocol */ /* diff --git a/src/include/ipxe/icmp6.h b/src/include/ipxe/icmp6.h deleted file mode 100644 index 1d433408..00000000 --- a/src/include/ipxe/icmp6.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef _IPXE_ICMP6_H -#define _IPXE_ICMP6_H - -/** @file - * - * ICMP6 protocol - * - */ - -FILE_LICENCE ( GPL2_OR_LATER ); - -#include -#include - -#define ICMP6_NSOLICIT 135 -#define ICMP6_NADVERT 136 - -extern struct tcpip_protocol icmp6_protocol __tcpip_protocol; - -struct icmp6_header { - uint8_t type; - uint8_t code; - uint16_t csum; - /* Message body */ -}; - -struct neighbour_solicit { - uint8_t type; - uint8_t code; - uint16_t csum; - uint32_t reserved; - struct in6_addr target; - /* "Compulsory" options */ - uint8_t opt_type; - uint8_t opt_len; - /* FIXME: hack alert */ - uint8_t opt_ll_addr[6]; -}; - -struct neighbour_advert { - uint8_t type; - uint8_t code; - uint16_t csum; - uint8_t flags; - uint8_t reserved; - struct in6_addr target; - uint8_t opt_type; - uint8_t opt_len; - /* FIXME: hack alert */ - uint8_t opt_ll_addr[6]; -}; - -#define ICMP6_FLAGS_ROUTER 0x80 -#define ICMP6_FLAGS_SOLICITED 0x40 -#define ICMP6_FLAGS_OVERRIDE 0x20 - -int icmp6_send_solicit ( struct net_device *netdev, struct in6_addr *src, struct in6_addr *dest ); - -#endif /* _IPXE_ICMP6_H */ diff --git a/src/include/ipxe/icmpv6.h b/src/include/ipxe/icmpv6.h new file mode 100644 index 00000000..c8f0be05 --- /dev/null +++ b/src/include/ipxe/icmpv6.h @@ -0,0 +1,78 @@ +#ifndef _IPXE_ICMP6_H +#define _IPXE_ICMP6_H + +/** @file + * + * ICMPv6 protocol + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include + +/** An ICMPv6 header */ +struct icmpv6_header { + /** Type */ + uint8_t type; + /** Code */ + uint8_t code; + /** Checksum */ + uint16_t chksum; +} __attribute__ (( packed )); + +/** An ICMPv6 echo request/reply */ +struct icmpv6_echo { + /** ICMPv6 header */ + struct icmpv6_header icmp; + /** Identifier */ + uint16_t ident; + /** Sequence number */ + uint16_t sequence; + /** Data */ + uint8_t data[0]; +} __attribute__ (( packed )); + +/** An ICMPv6 handler */ +struct icmpv6_handler { + /** Type */ + unsigned int type; + /** Process received packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @ret rc Return status code + * + * This function takes ownership of the I/O buffer. + */ + int ( * rx ) ( struct io_buffer *iobuf, struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest ); +}; + +/** ICMPv6 handler table */ +#define ICMPV6_HANDLERS __table ( struct icmpv6_handler, "icmpv6_handlers" ) + +/** Declare an ICMPv6 handler */ +#define __icmpv6_handler __table_entry ( ICMPV6_HANDLERS, 01 ) + +/** ICMPv6 echo request */ +#define ICMPV6_ECHO_REQUEST 128 + +/** ICMPv6 echo reply */ +#define ICMPV6_ECHO_REPLY 129 + +/** ICMPv6 neighbour solicitation */ +#define ICMPV6_NDP_NEIGHBOUR_SOLICITATION 135 + +/** ICMPv6 neighbour advertisement */ +#define ICMPV6_NDP_NEIGHBOUR_ADVERTISEMENT 136 + +extern struct tcpip_protocol icmpv6_protocol __tcpip_protocol; + +#endif /* _IPXE_ICMP6_H */ diff --git a/src/include/ipxe/in.h b/src/include/ipxe/in.h index eee9159f..a1821b1f 100644 --- a/src/include/ipxe/in.h +++ b/src/include/ipxe/in.h @@ -50,6 +50,13 @@ struct in6_addr { #define s6_addr32 in6_u.u6_addr32 }; +#define IN6_IS_ADDR_MULTICAST( addr ) \ + ( *( ( const uint8_t * ) (addr) ) == 0xff ) + +#define IN6_IS_ADDR_LINKLOCAL( addr ) \ + ( ( *( ( const uint16_t * ) (addr) ) & htons ( 0xffc0 ) ) == \ + htonl ( 0xfe80 ) ) + /** * IPv4 socket address */ @@ -90,9 +97,13 @@ struct sockaddr_in6 { uint16_t sin6_flags; /** TCP/IP port (part of struct @c sockaddr_tcpip) */ uint16_t sin6_port; - uint32_t sin6_flowinfo; /* Flow number */ - struct in6_addr sin6_addr; /* 128-bit destination address */ - uint32_t sin6_scope_id; /* Scope ID */ + /** Scope ID + * + * For link-local addresses, this is the network device index. + */ + uint16_t sin6_scope_id; + /** IPv6 address */ + struct in6_addr sin6_addr; /** Padding * * This ensures that a struct @c sockaddr_in6 is large @@ -103,20 +114,12 @@ struct sockaddr_in6 { ( sizeof ( sa_family_t ) /* sin6_family */ + sizeof ( uint16_t ) /* sin6_flags */ + sizeof ( uint16_t ) /* sin6_port */ + - sizeof ( uint32_t ) /* sin6_flowinfo */ + - sizeof ( struct in6_addr ) /* sin6_addr */ + - sizeof ( uint32_t ) /* sin6_scope_id */ ) ]; + sizeof ( uint16_t ) /* sin6_scope_id */ + + sizeof ( struct in6_addr ) /* sin6_addr */ ) ]; } __attribute__ (( may_alias )); extern int inet_aton ( const char *cp, struct in_addr *inp ); extern char * inet_ntoa ( struct in_addr in ); - -/* Adding the following for IP6 support - * - -extern int inet6_aton ( const char *cp, struct in6_addr *inp ); -extern char * inet6_ntoa ( struct in_addr in ); - - */ +extern char * inet6_ntoa ( const struct in6_addr *in6 ); #endif /* _IPXE_IN_H */ diff --git a/src/include/ipxe/ip6.h b/src/include/ipxe/ip6.h deleted file mode 100644 index e9584bd6..00000000 --- a/src/include/ipxe/ip6.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _IPXE_IP6_H -#define _IPXE_IP6_H - -/** @file - * - * IP6 protocol - * - */ - -FILE_LICENCE ( GPL2_OR_LATER ); - -#include -#include -#include -#include - -/* IP6 constants */ - -#define IP6_VERSION 0x6 -#define IP6_HOP_LIMIT 255 - -/** - * I/O buffer contents - * This is duplicated in tcp.h and here. Ideally it should go into iobuf.h - */ -#define MAX_HDR_LEN 100 -#define MAX_IOB_LEN 1500 -#define MIN_IOB_LEN MAX_HDR_LEN + 100 /* To account for padding by LL */ - -#define IP6_EQUAL( in6_addr1, in6_addr2 ) \ - ( memcmp ( ( char* ) &( in6_addr1 ), ( char* ) &( in6_addr2 ),\ - sizeof ( struct in6_addr ) ) == 0 ) - -#define IS_UNSPECIFIED( addr ) \ - ( ( (addr).in6_u.u6_addr32[0] == 0x00000000 ) && \ - ( (addr).in6_u.u6_addr32[1] == 0x00000000 ) && \ - ( (addr).in6_u.u6_addr32[2] == 0x00000000 ) && \ - ( (addr).in6_u.u6_addr32[3] == 0x00000000 ) ) -/* IP6 header */ -struct ip6_header { - uint32_t ver_traffic_class_flow_label; - uint16_t payload_len; - uint8_t nxt_hdr; - uint8_t hop_limit; - struct in6_addr src; - struct in6_addr dest; -}; - -/* IP6 pseudo header */ -struct ipv6_pseudo_header { - struct in6_addr src; - struct in6_addr dest; - uint8_t zero_padding; - uint8_t nxt_hdr; - uint16_t len; -}; - -/* Next header numbers */ -#define IP6_HOPBYHOP 0x00 -#define IP6_ROUTING 0x43 -#define IP6_FRAGMENT 0x44 -#define IP6_AUTHENTICATION 0x51 -#define IP6_DEST_OPTS 0x60 -#define IP6_ESP 0x50 -#define IP6_ICMP6 0x58 -#define IP6_NO_HEADER 0x59 - -struct io_buffer; - -extern struct net_protocol ipv6_protocol __net_protocol; -extern struct tcpip_net_protocol ipv6_tcpip_protocol __tcpip_net_protocol; -extern char * inet6_ntoa ( struct in6_addr in6 ); - -extern int add_ipv6_address ( struct net_device *netdev, - struct in6_addr prefix, int prefix_len, - struct in6_addr address, - struct in6_addr gateway ); -extern void del_ipv6_address ( struct net_device *netdev ); - -#endif /* _IPXE_IP6_H */ diff --git a/src/include/ipxe/ipv6.h b/src/include/ipxe/ipv6.h new file mode 100644 index 00000000..f404ba64 --- /dev/null +++ b/src/include/ipxe/ipv6.h @@ -0,0 +1,218 @@ +#ifndef _IPXE_IPV6_H +#define _IPXE_IPV6_H + +/** @file + * + * IPv6 protocol + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include +#include +#include + +/** IPv6 version */ +#define IPV6_VER 0x60000000UL + +/** IPv6 version mask */ +#define IPV6_MASK_VER 0xf0000000UL + +/** IPv6 maximum hop limit */ +#define IPV6_HOP_LIMIT 0xff + +/** IPv6 header */ +struct ipv6_header { + /** Version (4 bits), Traffic class (8 bits), Flow label (20 bits) */ + uint32_t ver_tc_label; + /** Payload length, including any extension headers */ + uint16_t len; + /** Next header type */ + uint8_t next_header; + /** Hop limit */ + uint8_t hop_limit; + /** Source address */ + struct in6_addr src; + /** Destination address */ + struct in6_addr dest; +} __attribute__ (( packed )); + +/** IPv6 extension header common fields */ +struct ipv6_extension_header_common { + /** Next header type */ + uint8_t next_header; + /** Header extension length (excluding first 8 bytes) */ + uint8_t len; +} __attribute__ (( packed )); + +/** IPv6 type-length-value options */ +struct ipv6_option { + /** Type */ + uint8_t type; + /** Length */ + uint8_t len; + /** Value */ + uint8_t value[0]; +} __attribute__ (( packed )); + +/** IPv6 option types */ +enum ipv6_option_type { + /** Pad1 */ + IPV6_OPT_PAD1 = 0x00, + /** PadN */ + IPV6_OPT_PADN = 0x01, +}; + +/** Test if IPv6 option can be safely ignored */ +#define IPV6_CAN_IGNORE_OPT( type ) ( ( (type) & 0xc0 ) == 0x00 ) + +/** IPv6 option-based extension header */ +struct ipv6_options_header { + /** Extension header common fields */ + struct ipv6_extension_header_common common; + /** Options */ + struct ipv6_option options[0]; +} __attribute__ (( packed )); + +/** IPv6 routing header */ +struct ipv6_routing_header { + /** Extension header common fields */ + struct ipv6_extension_header_common common; + /** Routing type */ + uint8_t type; + /** Segments left */ + uint8_t remaining; + /** Type-specific data */ + uint8_t data[0]; +} __attribute__ (( packed )); + +/** IPv6 fragment header */ +struct ipv6_fragment_header { + /** Extension header common fields */ + struct ipv6_extension_header_common common; + /** Fragment offset (13 bits), reserved, more fragments (1 bit) */ + uint16_t offset_more; + /** Identification */ + uint32_t ident; +} __attribute__ (( packed )); + +/** Fragment offset mask */ +#define IPV6_MASK_OFFSET 0xfff8 + +/** More fragments */ +#define IPV6_MASK_MOREFRAGS 0x0001 + +/** IPv6 extension header */ +union ipv6_extension_header { + /** Extension header common fields */ + struct ipv6_extension_header_common common; + /** Minimum size padding */ + uint8_t pad[8]; + /** Generic options header */ + struct ipv6_options_header options; + /** Hop-by-hop options header */ + struct ipv6_options_header hopbyhop; + /** Routing header */ + struct ipv6_routing_header routing; + /** Fragment header */ + struct ipv6_fragment_header fragment; + /** Destination options header */ + struct ipv6_options_header destination; +}; + +/** IPv6 header types */ +enum ipv6_header_type { + /** IPv6 hop-by-hop options header type */ + IPV6_HOPBYHOP = 0, + /** IPv6 routing header type */ + IPV6_ROUTING = 43, + /** IPv6 fragment header type */ + IPV6_FRAGMENT = 44, + /** IPv6 no next header type */ + IPV6_NO_HEADER = 59, + /** IPv6 destination options header type */ + IPV6_DESTINATION = 60, +}; + +/** IPv6 pseudo-header */ +struct ipv6_pseudo_header { + /** Source address */ + struct in6_addr src; + /** Destination address */ + struct in6_addr dest; + /** Upper-layer packet length */ + uint32_t len; + /** Zero padding */ + uint8_t zero[3]; + /** Next header */ + uint8_t next_header; +} __attribute__ (( packed )); + +/** An IPv6 address/routing table entry */ +struct ipv6_miniroute { + /** List of miniroutes */ + struct list_head list; + + /** Network device */ + struct net_device *netdev; + + /** IPv6 address */ + struct in6_addr address; + /** Prefix length */ + unsigned int prefix_len; + /** IPv6 prefix mask (derived from prefix length) */ + struct in6_addr prefix_mask; + /** Router address is present */ + int has_router; + /** Router address */ + struct in6_addr router; +}; + +/** + * Construct link-local address (via EUI-64) + * + * @v addr Address to construct + * @v netdev Network device + * @ret prefix_len Prefix length, or negative error + */ +static inline int ipv6_link_local ( struct in6_addr *addr, + struct net_device *netdev ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + const void *ll_addr = netdev->ll_addr; + int rc; + + memset ( addr, 0, sizeof ( *addr ) ); + addr->s6_addr16[0] = htons ( 0xfe80 ); + if ( ( rc = ll_protocol->eui64 ( ll_addr, &addr->s6_addr[8] ) ) != 0 ) + return rc; + addr->s6_addr[8] ^= 0x02; + return 64; +} + +/** + * Construct solicited-node multicast address + * + * @v addr Address to construct + * @v unicast Unicast address + */ +static inline void ipv6_solicited_node ( struct in6_addr *addr, + const struct in6_addr *unicast ) { + + memset ( addr, 0, sizeof ( *addr ) ); + addr->s6_addr16[0] = htons ( 0xff02 ); + addr->s6_addr[11] = 1; + addr->s6_addr[12] = 0xff; + memcpy ( &addr->s6_addr[13], &unicast->s6_addr[13], 3 ); +} + +extern struct list_head ipv6_miniroutes; + +extern struct net_protocol ipv6_protocol __net_protocol; + +extern int ipv6_has_addr ( struct net_device *netdev, struct in6_addr *addr ); + +#endif /* _IPXE_IPV6_H */ diff --git a/src/include/ipxe/ndp.h b/src/include/ipxe/ndp.h index 42bb2fe0..7b98637f 100644 --- a/src/include/ipxe/ndp.h +++ b/src/include/ipxe/ndp.h @@ -1,21 +1,80 @@ +#ifndef _IPXE_NDP_H +#define _IPXE_NDP_H + +/** @file + * + * Neighbour discovery protocol + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + #include -#include -#include -#include -#include #include -#include -#include -#include +#include +#include +#include -#define NDP_STATE_INVALID 0 -#define NDP_STATE_INCOMPLETE 1 -#define NDP_STATE_REACHABLE 2 -#define NDP_STATE_DELAY 3 -#define NDP_STATE_PROBE 4 -#define NDP_STATE_STALE 5 +/** An NDP option */ +struct ndp_option { + /** Type */ + uint8_t type; + /** Length (in blocks of 8 bytes) */ + uint8_t blocks; + /** Value */ + uint8_t value[0]; +} __attribute__ (( packed )); -int ndp_resolve ( struct net_device *netdev, struct in6_addr *src, - struct in6_addr *dest, void *dest_ll_addr ); -int ndp_process_advert ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src, - struct sockaddr_tcpip *st_dest ); +/** NDP option block size */ +#define NDP_OPTION_BLKSZ 8 + +/** An NDP header */ +struct ndp_header { + /** ICMPv6 header */ + struct icmpv6_header icmp; + /** Flags */ + uint8_t flags; + /** Reserved */ + uint8_t reserved[3]; + /** Target address */ + struct in6_addr target; + /** Options */ + struct ndp_option option[0]; +} __attribute__ (( packed )); + +/** NDP router flag */ +#define NDP_ROUTER 0x80 + +/** NDP solicited flag */ +#define NDP_SOLICITED 0x40 + +/** NDP override flag */ +#define NDP_OVERRIDE 0x20 + +/** NDP source link-layer address option */ +#define NDP_OPT_LL_SOURCE 1 + +/** NDP target link-layer address option */ +#define NDP_OPT_LL_TARGET 2 + +extern struct neighbour_discovery ndp_discovery; + +/** + * Transmit packet, determining link-layer address via NDP + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_dest Destination network-layer address + * @v net_source Source network-layer address + * @v ll_source Source link-layer address + * @ret rc Return status code + */ +static inline int ndp_tx ( struct io_buffer *iobuf, struct net_device *netdev, + const void *net_dest, const void *net_source, + const void *ll_source ) { + + return neighbour_tx ( iobuf, netdev, &ipv6_protocol, net_dest, + &ndp_discovery, net_source, ll_source ); +} + +#endif /* _IPXE_NDP_H */ diff --git a/src/net/icmpv6.c b/src/net/icmpv6.c index 72423806..54426be8 100644 --- a/src/net/icmpv6.c +++ b/src/net/icmpv6.c @@ -1,126 +1,172 @@ -#include +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + #include -#include #include +#include #include -#include -#include #include -#include -#include #include -#include +#include + +/** @file + * + * ICMPv6 protocol + * + */ /** - * Send neighbour solicitation packet + * Process received ICMPv6 echo request packet * - * @v netdev Network device - * @v src Source address - * @v dest Destination address - * - * This function prepares a neighbour solicitation packet and sends it to the - * network layer. + * @v iobuf I/O buffer + * @v netdev Network device + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @ret rc Return status code */ -int icmp6_send_solicit ( struct net_device *netdev, struct in6_addr *src __unused, - struct in6_addr *dest ) { - union { - struct sockaddr_in6 sin6; - struct sockaddr_tcpip st; - } st_dest; - struct ll_protocol *ll_protocol = netdev->ll_protocol; - struct neighbour_solicit *nsolicit; - struct io_buffer *iobuf = alloc_iob ( sizeof ( *nsolicit ) + MIN_IOB_LEN ); - iob_reserve ( iobuf, MAX_HDR_LEN ); - nsolicit = iob_put ( iobuf, sizeof ( *nsolicit ) ); - - /* Fill up the headers */ - memset ( nsolicit, 0, sizeof ( *nsolicit ) ); - nsolicit->type = ICMP6_NSOLICIT; - nsolicit->code = 0; - nsolicit->target = *dest; - nsolicit->opt_type = 1; - nsolicit->opt_len = ( 2 + ll_protocol->ll_addr_len ) / 8; - memcpy ( nsolicit->opt_ll_addr, netdev->ll_addr, - netdev->ll_protocol->ll_addr_len ); - /* Partial checksum */ - nsolicit->csum = 0; - nsolicit->csum = tcpip_chksum ( nsolicit, sizeof ( *nsolicit ) ); - - /* Solicited multicast address */ - st_dest.sin6.sin6_family = AF_INET6; - st_dest.sin6.sin6_addr.in6_u.u6_addr8[0] = 0xff; - st_dest.sin6.sin6_addr.in6_u.u6_addr8[2] = 0x02; - st_dest.sin6.sin6_addr.in6_u.u6_addr16[1] = 0x0000; - st_dest.sin6.sin6_addr.in6_u.u6_addr32[1] = 0x00000000; - st_dest.sin6.sin6_addr.in6_u.u6_addr16[4] = 0x0000; - st_dest.sin6.sin6_addr.in6_u.u6_addr16[5] = 0x0001; - st_dest.sin6.sin6_addr.in6_u.u6_addr32[3] = dest->in6_u.u6_addr32[3]; - st_dest.sin6.sin6_addr.in6_u.u6_addr8[13] = 0xff; - - /* Send packet over IP6 */ - return tcpip_tx ( iobuf, &icmp6_protocol, NULL, &st_dest.st, - NULL, &nsolicit->csum ); -} - -/** - * Process ICMP6 headers - * - * @v iobuf I/O buffer - * @v st_src Source address - * @v st_dest Destination address - */ -static int icmp6_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused, struct sockaddr_tcpip *st_src, - struct sockaddr_tcpip *st_dest, __unused uint16_t pshdr_csum ) { - struct icmp6_header *icmp6hdr = iobuf->data; +static int icmpv6_rx_echo ( struct io_buffer *iobuf, + struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest __unused ) { + struct sockaddr_tcpip *st_src = + ( ( struct sockaddr_tcpip * ) sin6_src ); + struct icmpv6_echo *echo = iobuf->data; + size_t len = iob_len ( iobuf ); + int rc; /* Sanity check */ - if ( iob_len ( iobuf ) < sizeof ( *icmp6hdr ) ) { - DBG ( "Packet too short (%zd bytes)\n", iob_len ( iobuf ) ); - free_iob ( iobuf ); - return -EINVAL; + if ( iob_len ( iobuf ) < sizeof ( *echo ) ) { + DBGC ( netdev, "ICMPv6 echo request too short at %zd bytes " + "(min %zd bytes)\n", iob_len ( iobuf ), + sizeof ( *echo ) ); + rc = -EINVAL; + goto done; + } + DBGC ( netdev, "ICMPv6 echo request from %s (id %#04x seq %#04x)\n", + inet6_ntoa ( &sin6_dest->sin6_addr ), ntohs ( echo->ident ), + ntohs ( echo->sequence ) ); + + /* Convert echo request to echo reply and recalculate checksum */ + echo->icmp.type = ICMPV6_ECHO_REPLY; + echo->icmp.chksum = 0; + echo->icmp.chksum = tcpip_chksum ( echo, len ); + + /* Transmit echo reply */ + if ( ( rc = tcpip_tx ( iob_disown ( iobuf ), &icmpv6_protocol, NULL, + st_src, netdev, &echo->icmp.chksum ) ) != 0 ) { + DBGC ( netdev, "ICMPv6 could not transmit reply: %s\n", + strerror ( rc ) ); + goto done; } - /* TODO: Verify checksum */ - - /* Process the ICMP header */ - switch ( icmp6hdr->type ) { - case ICMP6_NADVERT: - return ndp_process_advert ( iobuf, st_src, st_dest ); - } - return -ENOSYS; + done: + free_iob ( iobuf ); + return rc; } -#if 0 -void icmp6_test_nadvert (struct net_device *netdev, struct sockaddr_in6 *server_p, char *ll_addr) { - - struct sockaddr_in6 server; - memcpy ( &server, server_p, sizeof ( server ) ); - struct io_buffer *rxiobuf = alloc_iob ( 500 ); - iob_reserve ( rxiobuf, MAX_HDR_LEN ); - struct neighbour_advert *nadvert = iob_put ( rxiobuf, sizeof ( *nadvert ) ); - nadvert->type = 136; - nadvert->code = 0; - nadvert->flags = ICMP6_FLAGS_SOLICITED; - nadvert->csum = 0xffff; - nadvert->target = server.sin6_addr; - nadvert->opt_type = 2; - nadvert->opt_len = 1; - memcpy ( nadvert->opt_ll_addr, ll_addr, 6 ); - struct ip6_header *ip6hdr = iob_push ( rxiobuf, sizeof ( *ip6hdr ) ); - ip6hdr->ver_traffic_class_flow_label = htonl ( 0x60000000 ); - ip6hdr->hop_limit = 255; - ip6hdr->nxt_hdr = 58; - ip6hdr->payload_len = htons ( sizeof ( *nadvert ) ); - ip6hdr->src = server.sin6_addr; - ip6hdr->dest = server.sin6_addr; - hex_dump ( rxiobuf->data, iob_len ( rxiobuf ) ); - net_rx ( rxiobuf, netdev, htons ( ETH_P_IPV6 ), ll_addr ); -} -#endif - -/** ICMP6 protocol */ -struct tcpip_protocol icmp6_protocol __tcpip_protocol = { - .name = "ICMP6", - .rx = icmp6_rx, - .tcpip_proto = IP_ICMP6, // 58 +/** ICMPv6 echo request handlers */ +struct icmpv6_handler icmpv6_echo_handler __icmpv6_handler = { + .type = ICMPV6_ECHO_REQUEST, + .rx = icmpv6_rx_echo, +}; + +/** + * Identify ICMPv6 handler + * + * @v type ICMPv6 type + * @ret handler ICMPv6 handler, or NULL if not found + */ +static struct icmpv6_handler * icmpv6_handler ( unsigned int type ) { + struct icmpv6_handler *handler; + + for_each_table_entry ( handler, ICMPV6_HANDLERS ) { + if ( handler->type == type ) + return handler; + } + return NULL; +} + +/** + * Process a received packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v st_src Partially-filled source address + * @v st_dest Partially-filled destination address + * @v pshdr_csum Pseudo-header checksum + * @ret rc Return status code + */ +static int icmpv6_rx ( struct io_buffer *iobuf, struct net_device *netdev, + struct sockaddr_tcpip *st_src, + struct sockaddr_tcpip *st_dest, uint16_t pshdr_csum ) { + struct sockaddr_in6 *sin6_src = ( ( struct sockaddr_in6 * ) st_src ); + struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest ); + struct icmpv6_header *icmp = iobuf->data; + size_t len = iob_len ( iobuf ); + struct icmpv6_handler *handler; + unsigned int csum; + int rc; + + /* Sanity check */ + if ( len < sizeof ( *icmp ) ) { + DBGC ( netdev, "ICMPv6 packet too short at %zd bytes (min %zd " + "bytes)\n", len, sizeof ( *icmp ) ); + rc = -EINVAL; + goto done; + } + + /* Verify checksum */ + csum = tcpip_continue_chksum ( pshdr_csum, icmp, len ); + if ( csum != 0 ) { + DBGC ( netdev, "ICMPv6 checksum incorrect (is %04x, should be " + "0000)\n", csum ); + DBGC_HDA ( netdev, 0, icmp, len ); + rc = -EINVAL; + goto done; + } + + /* Identify handler */ + handler = icmpv6_handler ( icmp->type ); + if ( ! handler ) { + DBGC ( netdev, "ICMPv6 unrecognised type %d\n", icmp->type ); + rc = -ENOTSUP; + goto done; + } + + /* Pass to handler */ + if ( ( rc = handler->rx ( iob_disown ( iobuf ), netdev, sin6_src, + sin6_dest ) ) != 0 ) { + DBGC ( netdev, "ICMPv6 could not handle type %d: %s\n", + icmp->type, strerror ( rc ) ); + goto done; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** ICMPv6 TCP/IP protocol */ +struct tcpip_protocol icmpv6_protocol __tcpip_protocol = { + .name = "ICMPv6", + .rx = icmpv6_rx, + .tcpip_proto = IP_ICMP6, }; diff --git a/src/net/ipv4.c b/src/net/ipv4.c index bd318806..46f774e9 100644 --- a/src/net/ipv4.c +++ b/src/net/ipv4.c @@ -290,7 +290,7 @@ static int ipv4_tx ( struct io_buffer *iobuf, DBGC ( sin_dest->sin_addr, "IPv4 could not hash " "multicast %s: %s\n", inet_ntoa ( next_hop ), strerror ( rc ) ); - return rc; + goto err; } ll_dest = ll_dest_buf; } else { diff --git a/src/net/ipv6.c b/src/net/ipv6.c index 077118df..69feba19 100644 --- a/src/net/ipv6.c +++ b/src/net/ipv6.c @@ -1,76 +1,162 @@ -#include +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + #include -#include -#include #include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include #include +#include +#include +#include +#include -/* Unspecified IP6 address */ -static struct in6_addr ip6_none = { - .in6_u.u6_addr32 = { 0,0,0,0 } -}; +/** @file + * + * IPv6 protocol + * + */ -/** An IPv6 routing table entry */ -struct ipv6_miniroute { - /* List of miniroutes */ - struct list_head list; - - /* Network device */ - struct net_device *netdev; - - /* Destination prefix */ - struct in6_addr prefix; - /* Prefix length */ - int prefix_len; - /* IPv6 address of interface */ - struct in6_addr address; - /* Gateway address */ - struct in6_addr gateway; -}; +/* Disambiguate the various error causes */ +#define EINVAL_LEN __einfo_error ( EINFO_EINVAL_LEN ) +#define EINFO_EINVAL_LEN \ + __einfo_uniqify ( EINFO_EINVAL, 0x01, "Invalid length" ) +#define ENOTSUP_VER __einfo_error ( EINFO_ENOTSUP_VER ) +#define EINFO_ENOTSUP_VER \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x01, "Unsupported version" ) +#define ENOTSUP_HDR __einfo_error ( EINFO_ENOTSUP_HDR ) +#define EINFO_ENOTSUP_HDR \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x02, "Unsupported header type" ) +#define ENOTSUP_OPT __einfo_error ( EINFO_ENOTSUP_OPT ) +#define EINFO_ENOTSUP_OPT \ + __einfo_uniqify ( EINFO_ENOTSUP, 0x03, "Unsupported option" ) /** List of IPv6 miniroutes */ -static LIST_HEAD ( miniroutes ); +struct list_head ipv6_miniroutes = LIST_HEAD_INIT ( ipv6_miniroutes ); + +/** + * Determine debugging colour for IPv6 debug messages + * + * @v in IPv6 address + * @ret col Debugging colour (for DBGC()) + */ +static uint32_t ipv6col ( struct in6_addr *in ) { + return crc32_le ( 0, in, sizeof ( *in ) ); +} + +/** + * Check if network device has a specific IPv6 address + * + * @v netdev Network device + * @v addr IPv6 address + * @ret has_addr Network device has this IPv6 address + */ +int ipv6_has_addr ( struct net_device *netdev, struct in6_addr *addr ) { + struct ipv6_miniroute *miniroute; + + list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) { + if ( ( miniroute->netdev == netdev ) && + ( memcmp ( &miniroute->address, addr, + sizeof ( miniroute->address ) ) == 0 ) ) { + /* Found matching address */ + return 1; + } + } + return 0; +} + +/** + * Check if IPv6 address is within a routing table entry's local network + * + * @v miniroute Routing table entry + * @v address IPv6 address + * @ret is_local Address is within this entry's local network + */ +static int ipv6_is_local ( struct ipv6_miniroute *miniroute, + struct in6_addr *address ) { + unsigned int i; + + for ( i = 0 ; i < ( sizeof ( address->s6_addr32 ) / + sizeof ( address->s6_addr32[0] ) ) ; i++ ) { + if ( (( address->s6_addr32[i] ^ miniroute->address.s6_addr32[i]) + & miniroute->prefix_mask.s6_addr32[i] ) != 0 ) + return 0; + } + return 1; +} /** * Add IPv6 minirouting table entry * * @v netdev Network device - * @v prefix Destination prefix - * @v address Address of the interface - * @v gateway Gateway address (or ::0 for no gateway) - * @ret miniroute Routing table entry, or NULL + * @v address IPv6 address + * @v prefix_len Prefix length + * @v router Router address (or NULL) + * @ret miniroute Routing table entry, or NULL on failure */ -static struct ipv6_miniroute * __malloc -add_ipv6_miniroute ( struct net_device *netdev, struct in6_addr prefix, - int prefix_len, struct in6_addr address, - struct in6_addr gateway ) { +static struct ipv6_miniroute * __malloc +add_ipv6_miniroute ( struct net_device *netdev, struct in6_addr *address, + unsigned int prefix_len, struct in6_addr *router ) { struct ipv6_miniroute *miniroute; - - miniroute = malloc ( sizeof ( *miniroute ) ); - if ( miniroute ) { - /* Record routing information */ - miniroute->netdev = netdev_get ( netdev ); - miniroute->prefix = prefix; - miniroute->prefix_len = prefix_len; - miniroute->address = address; - miniroute->gateway = gateway; - - /* Add miniroute to list of miniroutes */ - if ( !IP6_EQUAL ( gateway, ip6_none ) ) { - list_add_tail ( &miniroute->list, &miniroutes ); - } else { - list_add ( &miniroute->list, &miniroutes ); - } + uint8_t *prefix_mask; + + DBGC ( netdev, "IPv6 add %s/%d ", inet6_ntoa ( address ), prefix_len ); + if ( router ) + DBGC ( netdev, "router %s ", inet6_ntoa ( router ) ); + DBGC ( netdev, "via %s\n", netdev->name ); + + /* Allocate and populate miniroute structure */ + miniroute = zalloc ( sizeof ( *miniroute ) ); + if ( ! miniroute ) + return NULL; + + /* Record routing information */ + miniroute->netdev = netdev_get ( netdev ); + memcpy ( &miniroute->address, address, sizeof ( miniroute->address ) ); + miniroute->prefix_len = prefix_len; + assert ( prefix_len <= ( 8 * sizeof ( miniroute->prefix_mask ) ) ); + for ( prefix_mask = miniroute->prefix_mask.s6_addr ; prefix_len >= 8 ; + prefix_mask++, prefix_len -= 8 ) { + *prefix_mask = 0xff; + } + if ( prefix_len ) + *prefix_mask <<= ( 8 - prefix_len ); + if ( router ) { + miniroute->has_router = 1; + memcpy ( &miniroute->router, router, + sizeof ( miniroute->router ) ); + } + + /* Add to end of list if we have a gateway, otherwise to start + * of list. + */ + if ( router ) { + list_add_tail ( &miniroute->list, &ipv6_miniroutes ); + } else { + list_add ( &miniroute->list, &ipv6_miniroutes ); } return miniroute; @@ -82,290 +168,516 @@ add_ipv6_miniroute ( struct net_device *netdev, struct in6_addr prefix, * @v miniroute Routing table entry */ static void del_ipv6_miniroute ( struct ipv6_miniroute *miniroute ) { + struct net_device *netdev = miniroute->netdev; + + DBGC ( netdev, "IPv6 del %s/%d ", inet6_ntoa ( &miniroute->address ), + miniroute->prefix_len ); + if ( miniroute->has_router ) + DBGC ( netdev, "router %s ", inet6_ntoa ( &miniroute->router )); + DBGC ( netdev, "via %s\n", netdev->name ); + netdev_put ( miniroute->netdev ); list_del ( &miniroute->list ); free ( miniroute ); } /** - * Add IPv6 interface + * Perform IPv6 routing * - * @v netdev Network device - * @v prefix Destination prefix - * @v address Address of the interface - * @v gateway Gateway address (or ::0 for no gateway) + * @v scope_id Destination address scope ID (for link-local addresses) + * @v dest Final destination address + * @ret dest Next hop destination address + * @ret miniroute Routing table entry to use, or NULL if no route */ -int add_ipv6_address ( struct net_device *netdev, struct in6_addr prefix, - int prefix_len, struct in6_addr address, - struct in6_addr gateway ) { +static struct ipv6_miniroute * ipv6_route ( unsigned int scope_id, + struct in6_addr **dest ) { struct ipv6_miniroute *miniroute; + int local; - /* Clear any existing address for this net device */ - del_ipv6_address ( netdev ); + /* Find first usable route in routing table */ + list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) { - /* Add new miniroute */ - miniroute = add_ipv6_miniroute ( netdev, prefix, prefix_len, address, - gateway ); - if ( ! miniroute ) - return -ENOMEM; + /* Skip closed network devices */ + if ( ! netdev_is_open ( miniroute->netdev ) ) + continue; + /* For link-local addresses, skip devices that are not + * the specified network device. + */ + if ( IN6_IS_ADDR_LINKLOCAL ( *dest ) && + ( miniroute->netdev->index != scope_id ) ) + continue; + + /* Skip non-gateway devices for which the prefix does + * not match. + */ + local = ipv6_is_local ( miniroute, *dest ); + if ( ! ( local || miniroute->has_router ) ) + continue; + + /* Update next hop if applicable */ + if ( ! local ) + *dest = &miniroute->router; + + return miniroute; + } + + return NULL; +} + +/** + * Check that received options can be safely ignored + * + * @v iphdr IPv6 header + * @v options Options extension header + * @v len Maximum length of header + * @ret rc Return status code + */ +static int ipv6_check_options ( struct ipv6_header *iphdr, + struct ipv6_options_header *options, + size_t len ) { + struct ipv6_option *option = options->options; + struct ipv6_option *end = ( ( ( void * ) options ) + len ); + + while ( option < end ) { + if ( ! IPV6_CAN_IGNORE_OPT ( option->type ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 unrecognised " + "option type %#02x:\n", option->type ); + DBGC_HDA ( ipv6col ( &iphdr->src ), 0, + options, len ); + return -ENOTSUP_OPT; + } + if ( option->type == IPV6_OPT_PAD1 ) { + option = ( ( ( void * ) option ) + 1 ); + } else { + option = ( ( ( void * ) option ) + option->len ); + } + } return 0; } /** - * Remove IPv6 interface + * Check if fragment matches fragment reassembly buffer * - * @v netdev Network device + * @v fragment Fragment reassembly buffer + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret is_fragment Fragment matches this reassembly buffer */ -void del_ipv6_address ( struct net_device *netdev ) { - struct ipv6_miniroute *miniroute; +static int ipv6_is_fragment ( struct fragment *fragment, + struct io_buffer *iobuf, size_t hdrlen ) { + struct ipv6_header *frag_iphdr = fragment->iobuf->data; + struct ipv6_fragment_header *frag_fhdr = + ( fragment->iobuf->data + fragment->hdrlen - + sizeof ( *frag_fhdr ) ); + struct ipv6_header *iphdr = iobuf->data; + struct ipv6_fragment_header *fhdr = + ( iobuf->data + hdrlen - sizeof ( *fhdr ) ); - list_for_each_entry ( miniroute, &miniroutes, list ) { - if ( miniroute->netdev == netdev ) { - del_ipv6_miniroute ( miniroute ); - break; - } - } + return ( ( memcmp ( &iphdr->src, &frag_iphdr->src, + sizeof ( iphdr->src ) ) == 0 ) && + ( fhdr->ident == frag_fhdr->ident ) ); } /** - * Calculate TCPIP checksum + * Get fragment offset * - * @v iobuf I/O buffer - * @v tcpip TCP/IP protocol - * - * This function constructs the pseudo header and completes the checksum in the - * upper layer header. + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret offset Offset */ -static uint16_t ipv6_tx_csum ( struct io_buffer *iobuf, uint16_t csum ) { - struct ip6_header *ip6hdr = iobuf->data; +static size_t ipv6_fragment_offset ( struct io_buffer *iobuf, size_t hdrlen ) { + struct ipv6_fragment_header *fhdr = + ( iobuf->data + hdrlen - sizeof ( *fhdr ) ); + + return ( ntohs ( fhdr->offset_more ) & IPV6_MASK_OFFSET ); +} + +/** + * Check if more fragments exist + * + * @v iobuf I/O buffer + * @v hdrlen Length of non-fragmentable potion of I/O buffer + * @ret more_frags More fragments exist + */ +static int ipv6_more_fragments ( struct io_buffer *iobuf, size_t hdrlen ) { + struct ipv6_fragment_header *fhdr = + ( iobuf->data + hdrlen - sizeof ( *fhdr ) ); + + return ( fhdr->offset_more & htons ( IPV6_MASK_MOREFRAGS ) ); +} + +/** Fragment reassembler */ +static struct fragment_reassembler ipv6_reassembler = { + .list = LIST_HEAD_INIT ( ipv6_reassembler.list ), + .is_fragment = ipv6_is_fragment, + .fragment_offset = ipv6_fragment_offset, + .more_fragments = ipv6_more_fragments, +}; + +/** + * Calculate IPv6 pseudo-header checksum + * + * @v iphdr IPv6 header + * @v len Payload length + * @v next_header Next header type + * @v csum Existing checksum + * @ret csum Updated checksum + */ +static uint16_t ipv6_pshdr_chksum ( struct ipv6_header *iphdr, size_t len, + int next_header, uint16_t csum ) { struct ipv6_pseudo_header pshdr; - /* Calculate pseudo header */ - memset ( &pshdr, 0, sizeof ( pshdr ) ); - pshdr.src = ip6hdr->src; - pshdr.dest = ip6hdr->dest; - pshdr.len = htons ( iob_len ( iobuf ) - sizeof ( *ip6hdr ) ); - pshdr.nxt_hdr = ip6hdr->nxt_hdr; + /* Build pseudo-header */ + memcpy ( &pshdr.src, &iphdr->src, sizeof ( pshdr.src ) ); + memcpy ( &pshdr.dest, &iphdr->dest, sizeof ( pshdr.dest ) ); + pshdr.len = htonl ( len ); + memset ( pshdr.zero, 0, sizeof ( pshdr.zero ) ); + pshdr.next_header = next_header; - /* Update checksum value */ + /* Update the checksum value */ return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) ); } /** - * Dump IP6 header for debugging + * Transmit IPv6 packet * - * ip6hdr IPv6 header - */ -void ipv6_dump ( struct ip6_header *ip6hdr ) { - DBG ( "IP6 %p src %s dest %s nxt_hdr %d len %d\n", ip6hdr, - inet6_ntoa ( ip6hdr->src ), inet6_ntoa ( ip6hdr->dest ), - ip6hdr->nxt_hdr, ntohs ( ip6hdr->payload_len ) ); -} - -/** - * Transmit IP6 packet + * @v iobuf I/O buffer + * @v tcpip Transport-layer protocol + * @v st_src Source network-layer address + * @v st_dest Destination network-layer address + * @v netdev Network device to use if no route found, or NULL + * @v trans_csum Transport-layer checksum to complete, or NULL + * @ret rc Status * - * iobuf I/O buffer - * tcpip TCP/IP protocol - * st_dest Destination socket address - * - * This function prepends the IPv6 headers to the payload an transmits it. + * This function expects a transport-layer segment and prepends the + * IPv6 header */ static int ipv6_tx ( struct io_buffer *iobuf, - struct tcpip_protocol *tcpip, - struct sockaddr_tcpip *st_src __unused, + struct tcpip_protocol *tcpip_protocol, + struct sockaddr_tcpip *st_src, struct sockaddr_tcpip *st_dest, struct net_device *netdev, uint16_t *trans_csum ) { - struct sockaddr_in6 *dest = ( struct sockaddr_in6* ) st_dest; - struct in6_addr next_hop; + struct sockaddr_in6 *sin6_src = ( ( struct sockaddr_in6 * ) st_src ); + struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest ); struct ipv6_miniroute *miniroute; + struct ipv6_header *iphdr; + struct in6_addr *next_hop; uint8_t ll_dest_buf[MAX_LL_ADDR_LEN]; - const uint8_t *ll_dest = ll_dest_buf; + const void *ll_dest; + size_t len; int rc; - /* Construct the IPv6 packet */ - struct ip6_header *ip6hdr = iob_push ( iobuf, sizeof ( *ip6hdr ) ); - memset ( ip6hdr, 0, sizeof ( *ip6hdr) ); - ip6hdr->ver_traffic_class_flow_label = htonl ( 0x60000000 );//IP6_VERSION; - ip6hdr->payload_len = htons ( iob_len ( iobuf ) - sizeof ( *ip6hdr ) ); - ip6hdr->nxt_hdr = tcpip->tcpip_proto; - ip6hdr->hop_limit = IP6_HOP_LIMIT; // 255 + /* Fill up the IPv6 header, except source address */ + len = iob_len ( iobuf ); + iphdr = iob_push ( iobuf, sizeof ( *iphdr ) ); + memset ( iphdr, 0, sizeof ( *iphdr ) ); + iphdr->ver_tc_label = htonl ( IPV6_VER ); + iphdr->len = htons ( len ); + iphdr->next_header = tcpip_protocol->tcpip_proto; + iphdr->hop_limit = IPV6_HOP_LIMIT; + memcpy ( &iphdr->dest, &sin6_dest->sin6_addr, sizeof ( iphdr->dest ) ); - /* Determine the next hop address and interface - * - * TODO: Implement the routing table. - */ - next_hop = dest->sin6_addr; - list_for_each_entry ( miniroute, &miniroutes, list ) { - if ( ( memcmp ( &ip6hdr->dest, &miniroute->prefix, - miniroute->prefix_len ) == 0 ) || - ( IP6_EQUAL ( miniroute->gateway, ip6_none ) ) ) { - netdev = miniroute->netdev; - ip6hdr->src = miniroute->address; - if ( ! ( IS_UNSPECIFIED ( miniroute->gateway ) ) ) { - next_hop = miniroute->gateway; - } - break; - } + /* Use routing table to identify next hop and transmitting netdev */ + next_hop = &iphdr->dest; + if ( sin6_src ) { + memcpy ( &iphdr->src, &sin6_src->sin6_addr, + sizeof ( iphdr->src ) ); } - /* No network interface identified */ - if ( !netdev ) { - DBG ( "No route to host %s\n", inet6_ntoa ( ip6hdr->dest ) ); + if ( ( ! IN6_IS_ADDR_MULTICAST ( next_hop ) ) && + ( ( miniroute = ipv6_route ( ntohl ( sin6_dest->sin6_scope_id ), + &next_hop ) ) != NULL ) ) { + memcpy ( &iphdr->src, &miniroute->address, + sizeof ( iphdr->src ) ); + netdev = miniroute->netdev; + } + if ( ! netdev ) { + DBGC ( ipv6col ( &iphdr->dest ), "IPv6 has no route to %s\n", + inet6_ntoa ( &iphdr->dest ) ); rc = -ENETUNREACH; goto err; } - /* Complete the transport layer checksum */ - if ( trans_csum ) - *trans_csum = ipv6_tx_csum ( iobuf, *trans_csum ); + /* Fix up checksums */ + if ( trans_csum ) { + *trans_csum = ipv6_pshdr_chksum ( iphdr, len, + tcpip_protocol->tcpip_proto, + *trans_csum ); + } - /* Print IPv6 header */ - ipv6_dump ( ip6hdr ); - - /* Resolve link layer address */ - if ( next_hop.in6_u.u6_addr8[0] == 0xff ) { - ll_dest_buf[0] = 0x33; - ll_dest_buf[1] = 0x33; - ll_dest_buf[2] = next_hop.in6_u.u6_addr8[12]; - ll_dest_buf[3] = next_hop.in6_u.u6_addr8[13]; - ll_dest_buf[4] = next_hop.in6_u.u6_addr8[14]; - ll_dest_buf[5] = next_hop.in6_u.u6_addr8[15]; - } else { - /* Unicast address needs to be resolved by NDP */ - if ( ( rc = ndp_resolve ( netdev, &next_hop, &ip6hdr->src, - ll_dest_buf ) ) != 0 ) { - DBG ( "No entry for %s\n", inet6_ntoa ( next_hop ) ); + /* Print IPv6 header for debugging */ + DBGC2 ( ipv6col ( &iphdr->dest ), "IPv6 TX %s->", + inet6_ntoa ( &iphdr->src ) ); + DBGC2 ( ipv6col ( &iphdr->dest ), "%s len %zd next %d\n", + inet6_ntoa ( &iphdr->dest ), len, iphdr->next_header ); + + /* Calculate link-layer destination address, if possible */ + if ( IN6_IS_ADDR_MULTICAST ( next_hop ) ) { + /* Multicast address */ + if ( ( rc = netdev->ll_protocol->mc_hash ( AF_INET6, next_hop, + ll_dest_buf ) ) !=0){ + DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not hash " + "multicast %s: %s\n", inet6_ntoa ( next_hop ), + strerror ( rc ) ); goto err; } + ll_dest = ll_dest_buf; + } else { + /* Unicast address */ + ll_dest = NULL; + } + + /* Hand off to link layer (via NDP if applicable) */ + if ( ll_dest ) { + if ( ( rc = net_tx ( iobuf, netdev, &ipv6_protocol, ll_dest, + netdev->ll_addr ) ) != 0 ) { + DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not " + "transmit packet via %s: %s\n", + netdev->name, strerror ( rc ) ); + return rc; + } + } else { + if ( ( rc = ndp_tx ( iobuf, netdev, next_hop, &iphdr->src, + netdev->ll_addr ) ) != 0 ) { + DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not " + "transmit packet via %s: %s\n", + netdev->name, strerror ( rc ) ); + return rc; + } } - /* Transmit packet */ - return net_tx ( iobuf, netdev, &ipv6_protocol, ll_dest, - netdev->ll_addr ); + return 0; - err: + err: free_iob ( iobuf ); return rc; } /** - * Process next IP6 header - * - * @v iobuf I/O buffer - * @v nxt_hdr Next header number - * @v src Source socket address - * @v dest Destination socket address - * - * Refer http://www.iana.org/assignments/ipv6-parameters for the numbers - */ -static int ipv6_process_nxt_hdr ( struct io_buffer *iobuf, - struct net_device *netdev, uint8_t nxt_hdr, - struct sockaddr_tcpip *src, struct sockaddr_tcpip *dest ) { - switch ( nxt_hdr ) { - case IP6_HOPBYHOP: - case IP6_ROUTING: - case IP6_FRAGMENT: - case IP6_AUTHENTICATION: - case IP6_DEST_OPTS: - case IP6_ESP: - DBG ( "Function not implemented for header %d\n", nxt_hdr ); - return -ENOSYS; - case IP6_ICMP6: - break; - case IP6_NO_HEADER: - DBG ( "No next header\n" ); - return 0; - } - /* Next header is not a IPv6 extension header */ - return tcpip_rx ( iobuf, netdev, nxt_hdr, src, dest, 0 /* fixme */ ); -} - -/** - * Process incoming IP6 packets + * Process incoming IPv6 packets * * @v iobuf I/O buffer * @v netdev Network device * @v ll_dest Link-layer destination address - * @v ll_source Link-layer source address + * @v ll_source Link-layer destination source * @v flags Packet flags + * @ret rc Return status code * - * This function processes a IPv6 packet + * This function expects an IPv6 network datagram. It processes the + * headers and sends it to the transport layer. */ -static int ipv6_rx ( struct io_buffer *iobuf, - __unused struct net_device *netdev, - __unused const void *ll_dest, - __unused const void *ll_source, - __unused unsigned int flags ) { - - struct ip6_header *ip6hdr = iobuf->data; +static int ipv6_rx ( struct io_buffer *iobuf, struct net_device *netdev, + const void *ll_dest __unused, + const void *ll_source __unused, + unsigned int flags __unused ) { + struct ipv6_header *iphdr = iobuf->data; + union ipv6_extension_header *ext; union { struct sockaddr_in6 sin6; struct sockaddr_tcpip st; } src, dest; + uint16_t pshdr_csum; + size_t len; + size_t hdrlen; + size_t extlen; + int this_header; + int next_header; + int rc; - /* Sanity check */ - if ( iob_len ( iobuf ) < sizeof ( *ip6hdr ) ) { - DBG ( "Packet too short (%zd bytes)\n", iob_len ( iobuf ) ); - goto drop; + /* Sanity check the IPv6 header */ + if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 packet too short at %zd " + "bytes (min %zd bytes)\n", iob_len ( iobuf ), + sizeof ( *iphdr ) ); + rc = -EINVAL_LEN; + goto err; + } + if ( ( iphdr->ver_tc_label & htonl ( IPV6_MASK_VER ) ) != + htonl ( IPV6_VER ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 version %#08x not " + "supported\n", ntohl ( iphdr->ver_tc_label ) ); + rc = -ENOTSUP_VER; + goto err; } - /* TODO: Verify checksum */ + /* Truncate packet to specified length */ + len = ntohs ( iphdr->len ); + if ( len > iob_len ( iobuf ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 length too long at %zd " + "bytes (packet is %zd bytes)\n", len, iob_len ( iobuf )); + rc = -EINVAL_LEN; + goto err; + } + iob_unput ( iobuf, ( iob_len ( iobuf ) - len - sizeof ( *iphdr ) ) ); + hdrlen = sizeof ( *iphdr ); - /* Print IP6 header for debugging */ - ipv6_dump ( ip6hdr ); + /* Print IPv6 header for debugging */ + DBGC2 ( ipv6col ( &iphdr->src ), "IPv6 RX %s<-", + inet6_ntoa ( &iphdr->dest ) ); + DBGC2 ( ipv6col ( &iphdr->src ), "%s len %zd next %d\n", + inet6_ntoa ( &iphdr->src ), len, iphdr->next_header ); - /* Check header version */ - if ( ( ip6hdr->ver_traffic_class_flow_label & 0xf0000000 ) != 0x60000000 ) { - DBG ( "Invalid protocol version\n" ); - goto drop; + /* Discard unicast packets not destined for us */ + if ( ( ! ( flags & LL_MULTICAST ) ) && + ( ! ipv6_has_addr ( netdev, &iphdr->dest ) ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 discarding non-local " + "unicast packet for %s\n", inet6_ntoa ( &iphdr->dest ) ); + rc = -EPIPE; + goto err; } - /* Check the payload length */ - if ( ntohs ( ip6hdr->payload_len ) > iob_len ( iobuf ) ) { - DBG ( "Inconsistent packet length (%d bytes)\n", - ip6hdr->payload_len ); - goto drop; + /* Process any extension headers */ + next_header = iphdr->next_header; + while ( 1 ) { + + /* Extract extension header */ + this_header = next_header; + ext = ( iobuf->data + hdrlen ); + extlen = sizeof ( ext->pad ); + if ( iob_len ( iobuf ) < ( hdrlen + extlen ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 too short for " + "extension header type %d at %zd bytes (min " + "%zd bytes)\n", this_header, + ( iob_len ( iobuf ) - hdrlen ), extlen ); + rc = -EINVAL_LEN; + goto err; + } + + /* Determine size of extension header (if applicable) */ + if ( ( this_header == IPV6_HOPBYHOP ) || + ( this_header == IPV6_DESTINATION ) || + ( this_header == IPV6_ROUTING ) ) { + /* Length field is present */ + extlen += ext->common.len; + } else if ( this_header == IPV6_FRAGMENT ) { + /* Length field is reserved and ignored (RFC2460) */ + } else { + /* Not an extension header; assume rest is payload */ + break; + } + if ( iob_len ( iobuf ) < ( hdrlen + extlen ) ) { + DBGC ( ipv6col ( &iphdr->src ), "IPv6 too short for " + "extension header type %d at %zd bytes (min " + "%zd bytes)\n", this_header, + ( iob_len ( iobuf ) - hdrlen ), extlen ); + rc = -EINVAL_LEN; + goto err; + } + hdrlen += extlen; + next_header = ext->common.next_header; + DBGC2 ( ipv6col ( &iphdr->src ), "IPv6 RX %s<-", + inet6_ntoa ( &iphdr->dest ) ); + DBGC2 ( ipv6col ( &iphdr->src ), "%s ext type %d len %zd next " + "%d\n", inet6_ntoa ( &iphdr->src ), this_header, + extlen, next_header ); + + /* Process this extension header */ + if ( ( this_header == IPV6_HOPBYHOP ) || + ( this_header == IPV6_DESTINATION ) ) { + + /* Check that all options can be ignored */ + if ( ( rc = ipv6_check_options ( iphdr, &ext->options, + extlen ) ) != 0 ) + goto err; + + } else if ( this_header == IPV6_FRAGMENT ) { + + /* Reassemble fragments */ + iobuf = fragment_reassemble ( &ipv6_reassembler, iobuf, + &hdrlen ); + if ( ! iobuf ) + return 0; + iphdr = iobuf->data; + } } - /* Ignore the traffic class and flow control values */ - - /* Construct socket address */ + /* Construct socket address, calculate pseudo-header checksum, + * and hand off to transport layer + */ memset ( &src, 0, sizeof ( src ) ); src.sin6.sin6_family = AF_INET6; - src.sin6.sin6_addr = ip6hdr->src; + memcpy ( &src.sin6.sin6_addr, &iphdr->src, + sizeof ( src.sin6.sin6_addr ) ); + src.sin6.sin6_scope_id = htonl ( netdev->index ); memset ( &dest, 0, sizeof ( dest ) ); dest.sin6.sin6_family = AF_INET6; - dest.sin6.sin6_addr = ip6hdr->dest; + memcpy ( &dest.sin6.sin6_addr, &iphdr->dest, + sizeof ( dest.sin6.sin6_addr ) ); + dest.sin6.sin6_scope_id = htonl ( netdev->index ); + iob_pull ( iobuf, hdrlen ); + pshdr_csum = ipv6_pshdr_chksum ( iphdr, iob_len ( iobuf ), + next_header, TCPIP_EMPTY_CSUM ); + if ( ( rc = tcpip_rx ( iobuf, netdev, next_header, &src.st, &dest.st, + pshdr_csum ) ) != 0 ) { + DBGC ( ipv6col ( &src.sin6.sin6_addr ), "IPv6 received packet " + "rejected by stack: %s\n", strerror ( rc ) ); + return rc; + } - /* Strip header */ - iob_unput ( iobuf, iob_len ( iobuf ) - ntohs ( ip6hdr->payload_len ) - - sizeof ( *ip6hdr ) ); - iob_pull ( iobuf, sizeof ( *ip6hdr ) ); + return 0; - /* Send it to the transport layer */ - return ipv6_process_nxt_hdr ( iobuf, netdev, ip6hdr->nxt_hdr, &src.st, &dest.st ); - - drop: - DBG ( "Packet dropped\n" ); + err: free_iob ( iobuf ); - return -1; + return rc; } /** - * Print a IP6 address as xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx + * Convert IPv6 address to standard notation + * + * @v in IPv6 address + * @ret string IPv6 address in standard notation + * + * RFC5952 defines the canonical format for IPv6 textual representation. */ -char * inet6_ntoa ( struct in6_addr in6 ) { - static char buf[40]; - uint16_t *bytes = ( uint16_t* ) &in6; - sprintf ( buf, "%x:%x:%x:%x:%x:%x:%x:%x", bytes[0], bytes[1], bytes[2], - bytes[3], bytes[4], bytes[5], bytes[6], bytes[7] ); - return buf; +char * inet6_ntoa ( const struct in6_addr *in ) { + static char buf[41]; /* ":xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx" */ + char *out = buf; + char *longest_start = NULL; + char *start = NULL; + int longest_len = 1; + int len = 0; + char *dest; + unsigned int i; + uint16_t value; + + /* Format address, keeping track of longest run of zeros */ + for ( i = 0 ; i < ( sizeof ( in->s6_addr16 ) / + sizeof ( in->s6_addr16[0] ) ) ; i++ ) { + value = ntohs ( in->s6_addr16[i] ); + if ( value == 0 ) { + if ( len++ == 0 ) + start = out; + if ( len > longest_len ) { + longest_start = start; + longest_len = len; + } + } else { + len = 0; + } + out += sprintf ( out, ":%x", value ); + } + + /* Abbreviate longest run of zeros, if applicable */ + if ( longest_start ) { + dest = strcpy ( ( longest_start + 1 ), + ( longest_start + ( 2 * longest_len ) ) ); + if ( dest[0] == '\0' ) + dest[1] = '\0'; + dest[0] = ':'; + } + return ( ( longest_start == buf ) ? buf : ( buf + 1 ) ); } +/** + * Transcribe IPv6 address + * + * @v net_addr IPv6 address + * @ret string IPv6 address in standard notation + * + */ static const char * ipv6_ntoa ( const void *net_addr ) { - return inet6_ntoa ( * ( ( struct in6_addr * ) net_addr ) ); + return inet6_ntoa ( net_addr ); } /** IPv6 protocol */ @@ -383,3 +695,72 @@ struct tcpip_net_protocol ipv6_tcpip_protocol __tcpip_net_protocol = { .sa_family = AF_INET6, .tx = ipv6_tx, }; + +/** + * Create IPv6 network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ipv6_probe ( struct net_device *netdev ) { + struct ipv6_miniroute *miniroute; + struct in6_addr address; + int prefix_len; + int rc; + + /* Construct link-local address from EUI-64 as per RFC 2464 */ + prefix_len = ipv6_link_local ( &address, netdev ); + if ( prefix_len < 0 ) { + rc = prefix_len; + DBGC ( netdev, "IPv6 %s could not construct link-local " + "address: %s\n", netdev->name, strerror ( rc ) ); + return rc; + } + + /* Create link-local address for this network device */ + miniroute = add_ipv6_miniroute ( netdev, &address, prefix_len, NULL ); + if ( ! miniroute ) + return -ENOMEM; + + return 0; +} + +/** + * Handle IPv6 network device or link state change + * + * @v netdev Network device + */ +static void ipv6_notify ( struct net_device *netdev __unused ) { + + /* Nothing to do */ +} + +/** + * Destroy IPv6 network device + * + * @v netdev Network device + */ +static void ipv6_remove ( struct net_device *netdev ) { + struct ipv6_miniroute *miniroute; + struct ipv6_miniroute *tmp; + + /* Delete all miniroutes for this network device */ + list_for_each_entry_safe ( miniroute, tmp, &ipv6_miniroutes, list ) { + if ( miniroute->netdev == netdev ) + del_ipv6_miniroute ( miniroute ); + } +} + +/** IPv6 network device driver */ +struct net_driver ipv6_driver __net_driver = { + .name = "IPv6", + .probe = ipv6_probe, + .notify = ipv6_notify, + .remove = ipv6_remove, +}; + +/* Drag in ICMPv6 */ +REQUIRE_OBJECT ( icmpv6 ); + +/* Drag in NDP */ +REQUIRE_OBJECT ( ndp ); diff --git a/src/net/ndp.c b/src/net/ndp.c index 4d371335..48b16d02 100644 --- a/src/net/ndp.c +++ b/src/net/ndp.c @@ -1,180 +1,370 @@ -#include +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + #include -#include #include -#include +#include +#include #include +#include +#include +#include +#include #include -#include -#include -#include /** @file * - * Neighbour Discovery Protocol + * IPv6 neighbour discovery protocol * - * This file implements address resolution as specified by the neighbour - * discovery protocol in RFC2461. This protocol is part of the IPv6 protocol - * family. */ -/* A neighbour entry */ -struct ndp_entry { - /** Target IP6 address */ - struct in6_addr in6; - /** Link layer protocol */ - struct ll_protocol *ll_protocol; - /** Link-layer address */ - uint8_t ll_addr[MAX_LL_ADDR_LEN]; - /** State of the neighbour entry */ - int state; -}; - -/** Number of entries in the neighbour cache table */ -#define NUM_NDP_ENTRIES 4 - -/** The neighbour cache table */ -static struct ndp_entry ndp_table[NUM_NDP_ENTRIES]; -#define ndp_table_end &ndp_table[NUM_NDP_ENTRIES] - -static unsigned int next_new_ndp_entry = 0; - /** - * Find entry in the neighbour cache - * - * @v in6 IP6 address - */ -static struct ndp_entry * -ndp_find_entry ( struct in6_addr *in6 ) { - struct ndp_entry *ndp; - - for ( ndp = ndp_table ; ndp < ndp_table_end ; ndp++ ) { - if ( IP6_EQUAL ( ( *in6 ), ndp->in6 ) && - ( ndp->state != NDP_STATE_INVALID ) ) { - return ndp; - } - } - return NULL; -} - -/** - * Add NDP entry - * - * @v netdev Network device - * @v in6 IP6 address - * @v ll_addr Link-layer address - * @v state State of the entry - one of the NDP_STATE_XXX values - */ -static void -add_ndp_entry ( struct net_device *netdev, struct in6_addr *in6, - void *ll_addr, int state ) { - struct ndp_entry *ndp; - ndp = &ndp_table[next_new_ndp_entry++ % NUM_NDP_ENTRIES]; - - /* Fill up entry */ - ndp->ll_protocol = netdev->ll_protocol; - memcpy ( &ndp->in6, &( *in6 ), sizeof ( *in6 ) ); - if ( ll_addr ) { - memcpy ( ndp->ll_addr, ll_addr, netdev->ll_protocol->ll_addr_len ); - } else { - memset ( ndp->ll_addr, 0, netdev->ll_protocol->ll_addr_len ); - } - ndp->state = state; - DBG ( "New neighbour cache entry: IP6 %s => %s %s\n", - inet6_ntoa ( ndp->in6 ), netdev->ll_protocol->name, - netdev->ll_protocol->ntoa ( ndp->ll_addr ) ); -} - -/** - * Resolve the link-layer address + * Transmit NDP neighbour solicitation/advertisement packet * * @v netdev Network device - * @v dest Destination address - * @v src Source address - * @ret dest_ll_addr Destination link-layer address or NULL - * @ret rc Status - * - * This function looks up the neighbour cache for an entry corresponding to the - * destination address. If it finds a valid entry, it fills up dest_ll_addr and - * returns 0. Otherwise it sends a neighbour solicitation to the solicited - * multicast address. + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @v target Neighbour target address + * @v icmp_type ICMPv6 type + * @v flags NDP flags + * @v option_type NDP option type + * @ret rc Return status code */ -int ndp_resolve ( struct net_device *netdev, struct in6_addr *dest, - struct in6_addr *src, void *dest_ll_addr ) { +static int ndp_tx_neighbour ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest, + const struct in6_addr *target, + unsigned int icmp_type, + unsigned int flags, + unsigned int option_type ) { + struct sockaddr_tcpip *st_src = + ( ( struct sockaddr_tcpip * ) sin6_src ); + struct sockaddr_tcpip *st_dest = + ( ( struct sockaddr_tcpip * ) sin6_dest ); struct ll_protocol *ll_protocol = netdev->ll_protocol; - struct ndp_entry *ndp; + struct io_buffer *iobuf; + struct ndp_header *ndp; + size_t option_len; + size_t len; int rc; - ndp = ndp_find_entry ( dest ); - /* Check if the entry is valid */ - if ( ndp && ndp->state == NDP_STATE_REACHABLE ) { - DBG ( "Neighbour cache hit: IP6 %s => %s %s\n", - inet6_ntoa ( *dest ), ll_protocol->name, - ll_protocol->ntoa ( ndp->ll_addr ) ); - memcpy ( dest_ll_addr, ndp->ll_addr, ll_protocol->ll_addr_len ); - return 0; - } + /* Allocate and populate buffer */ + option_len = ( ( sizeof ( ndp->option[0] ) + ll_protocol->ll_addr_len + + NDP_OPTION_BLKSZ - 1 ) & + ~( NDP_OPTION_BLKSZ - 1 ) ); + len = ( sizeof ( *ndp ) + option_len ); + iobuf = alloc_iob ( MAX_LL_NET_HEADER_LEN + len ); + if ( ! iobuf ) + return -ENOMEM; + iob_reserve ( iobuf, MAX_LL_NET_HEADER_LEN ); + ndp = iob_put ( iobuf, len ); + memset ( ndp, 0, len ); + ndp->icmp.type = icmp_type; + ndp->flags = flags; + memcpy ( &ndp->target, target, sizeof ( ndp->target ) ); + ndp->option[0].type = option_type; + ndp->option[0].blocks = ( option_len / NDP_OPTION_BLKSZ ); + memcpy ( ndp->option[0].value, netdev->ll_addr, + ll_protocol->ll_addr_len ); + ndp->icmp.chksum = tcpip_chksum ( ndp, len ); - /* Check if the entry was already created */ - if ( ndp ) { - DBG ( "Awaiting neighbour advertisement\n" ); - /* For test */ -// ndp->state = NDP_STATE_REACHABLE; -// memcpy ( ndp->ll_addr, netdev->ll_addr, 6 ); -// assert ( ndp->ll_protocol->ll_addr_len == 6 ); -// icmp6_test_nadvert ( netdev, dest, ndp->ll_addr ); -// assert ( ndp->state == NDP_STATE_REACHABLE ); - /* Take it out till here */ - return -ENOENT; - } - DBG ( "Neighbour cache miss: IP6 %s\n", inet6_ntoa ( *dest ) ); - - /* Add entry in the neighbour cache */ - add_ndp_entry ( netdev, dest, NULL, NDP_STATE_INCOMPLETE ); - - /* Send neighbour solicitation */ - if ( ( rc = icmp6_send_solicit ( netdev, src, dest ) ) != 0 ) { + /* Transmit packet */ + if ( ( rc = tcpip_tx ( iobuf, &icmpv6_protocol, st_src, st_dest, + netdev, &ndp->icmp.chksum ) ) != 0 ) { + DBGC ( netdev, "NDP could not transmit packet: %s\n", + strerror ( rc ) ); return rc; } - return -ENOENT; + + return 0; } /** - * Process neighbour advertisement + * Transmit NDP neighbour discovery request * - * @v iobuf I/O buffer - * @v st_src Source address - * @v st_dest Destination address + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v net_dest Destination network-layer address + * @v net_source Source network-layer address + * @ret rc Return status code */ -int ndp_process_advert ( struct io_buffer *iobuf, struct sockaddr_tcpip *st_src __unused, - struct sockaddr_tcpip *st_dest __unused ) { - struct neighbour_advert *nadvert = iobuf->data; - struct ndp_entry *ndp; +static int ndp_tx_request ( struct net_device *netdev, + struct net_protocol *net_protocol __unused, + const void *net_dest, const void *net_source ) { + struct sockaddr_in6 sin6_src; + struct sockaddr_in6 sin6_dest; + + /* Construct source address */ + memset ( &sin6_src, 0, sizeof ( sin6_src ) ); + sin6_src.sin6_family = AF_INET6; + memcpy ( &sin6_src.sin6_addr, net_source, + sizeof ( sin6_src.sin6_addr ) ); + sin6_src.sin6_scope_id = htons ( netdev->index ); + + /* Construct multicast destination address */ + memset ( &sin6_dest, 0, sizeof ( sin6_dest ) ); + sin6_dest.sin6_family = AF_INET6; + sin6_dest.sin6_scope_id = htons ( netdev->index ); + ipv6_solicited_node ( &sin6_dest.sin6_addr, net_dest ); + + /* Transmit neighbour discovery packet */ + return ndp_tx_neighbour ( netdev, &sin6_src, &sin6_dest, net_dest, + ICMPV6_NDP_NEIGHBOUR_SOLICITATION, 0, + NDP_OPT_LL_SOURCE ); +} + +/** NDP neighbour discovery protocol */ +struct neighbour_discovery ndp_discovery = { + .name = "NDP", + .tx_request = ndp_tx_request, +}; + +/** + * Process NDP neighbour solicitation source link-layer address option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v ll_addr Source link-layer address + * @v ll_addr_len Source link-layer address length + * @ret rc Return status code + */ +static int ndp_rx_neighbour_solicitation ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct ndp_header *ndp __unused, + const void *ll_addr, + size_t ll_addr_len ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + int rc; + + /* Silently ignore neighbour solicitations for addresses we do + * not own. + */ + if ( ! ipv6_has_addr ( netdev, &ndp->target ) ) + return 0; /* Sanity check */ - if ( iob_len ( iobuf ) < sizeof ( *nadvert ) ) { - DBG ( "Packet too short (%zd bytes)\n", iob_len ( iobuf ) ); + if ( ll_addr_len < ll_protocol->ll_addr_len ) { + DBGC ( netdev, "NDP neighbour solicitation link-layer address " + "too short at %zd bytes (min %d bytes)\n", + ll_addr_len, ll_protocol->ll_addr_len ); return -EINVAL; } - assert ( nadvert->code == 0 ); - assert ( nadvert->flags & ICMP6_FLAGS_SOLICITED ); - assert ( nadvert->opt_type == 2 ); - - /* Update the neighbour cache, if entry is present */ - ndp = ndp_find_entry ( &nadvert->target ); - if ( ndp ) { - - assert ( nadvert->opt_len == - ( ( 2 + ndp->ll_protocol->ll_addr_len ) / 8 ) ); - - if ( IP6_EQUAL ( ndp->in6, nadvert->target ) ) { - memcpy ( ndp->ll_addr, nadvert->opt_ll_addr, - ndp->ll_protocol->ll_addr_len ); - ndp->state = NDP_STATE_REACHABLE; - return 0; - } + /* Create or update neighbour cache entry */ + if ( ( rc = neighbour_define ( netdev, &ipv6_protocol, + &sin6_src->sin6_addr, + ll_addr ) ) != 0 ) { + DBGC ( netdev, "NDP could not define %s => %s: %s\n", + inet6_ntoa ( &sin6_src->sin6_addr ), + ll_protocol->ntoa ( ll_addr ), strerror ( rc ) ); + return rc; } - DBG ( "Unsolicited advertisement (dropping packet)\n" ); + + /* Send neighbour advertisement */ + if ( ( rc = ndp_tx_neighbour ( netdev, NULL, sin6_src, &ndp->target, + ICMPV6_NDP_NEIGHBOUR_ADVERTISEMENT, + ( NDP_SOLICITED | NDP_OVERRIDE ), + NDP_OPT_LL_TARGET ) ) != 0 ) { + return rc; + } + return 0; } + +/** + * Process NDP neighbour advertisement target link-layer address option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v ll_addr Target link-layer address + * @v ll_addr_len Target link-layer address length + * @ret rc Return status code + */ +static int +ndp_rx_neighbour_advertisement ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src __unused, + struct ndp_header *ndp, const void *ll_addr, + size_t ll_addr_len ) { + struct ll_protocol *ll_protocol = netdev->ll_protocol; + int rc; + + /* Sanity check */ + if ( ll_addr_len < ll_protocol->ll_addr_len ) { + DBGC ( netdev, "NDP neighbour advertisement link-layer address " + "too short at %zd bytes (min %d bytes)\n", + ll_addr_len, ll_protocol->ll_addr_len ); + return -EINVAL; + } + + /* Update neighbour cache entry, if any */ + if ( ( rc = neighbour_update ( netdev, &ipv6_protocol, &ndp->target, + ll_addr ) ) != 0 ) { + DBGC ( netdev, "NDP could not update %s => %s: %s\n", + inet6_ntoa ( &ndp->target ), + ll_protocol->ntoa ( ll_addr ), strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** An NDP option handler */ +struct ndp_option_handler { + /** ICMPv6 type */ + uint8_t icmp_type; + /** Option type */ + uint8_t option_type; + /** + * Handle received option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v value Option value + * @v len Option length + * @ret rc Return status code + */ + int ( * rx ) ( struct net_device *netdev, struct sockaddr_in6 *sin6_src, + struct ndp_header *ndp, const void *value, size_t len ); +}; + +/** NDP option handlers */ +static struct ndp_option_handler ndp_option_handlers[] = { + { + .icmp_type = ICMPV6_NDP_NEIGHBOUR_SOLICITATION, + .option_type = NDP_OPT_LL_SOURCE, + .rx = ndp_rx_neighbour_solicitation, + }, + { + .icmp_type = ICMPV6_NDP_NEIGHBOUR_ADVERTISEMENT, + .option_type = NDP_OPT_LL_TARGET, + .rx = ndp_rx_neighbour_advertisement, + }, +}; + +/** + * Process received NDP option + * + * @v netdev Network device + * @v sin6_src Source socket address + * @v ndp NDP packet + * @v type Option type + * @v value Option value + * @v len Option length + * @ret rc Return status code + */ +static int ndp_rx_option ( struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct ndp_header *ndp, unsigned int type, + const void *value, size_t len ) { + struct ndp_option_handler *handler; + unsigned int i; + + /* Locate a suitable option handler, if any */ + for ( i = 0 ; i < ( sizeof ( ndp_option_handlers ) / + sizeof ( ndp_option_handlers[0] ) ) ; i++ ) { + handler = &ndp_option_handlers[i]; + if ( ( handler->icmp_type == ndp->icmp.type ) && + ( handler->option_type == type ) ) { + return handler->rx ( netdev, sin6_src, ndp, + value, len ); + } + } + + /* Silently ignore unknown options as per RFC 4861 */ + return 0; +} + +/** + * Process received NDP packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v sin6_src Source socket address + * @v sin6_dest Destination socket address + * @ret rc Return status code + */ +static int ndp_rx ( struct io_buffer *iobuf, + struct net_device *netdev, + struct sockaddr_in6 *sin6_src, + struct sockaddr_in6 *sin6_dest __unused ) { + struct ndp_header *ndp = iobuf->data; + struct ndp_option *option; + size_t remaining; + size_t option_len; + size_t option_value_len; + int rc; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ndp ) ) { + DBGC ( netdev, "NDP packet too short at %zd bytes (min %zd " + "bytes)\n", iob_len ( iobuf ), sizeof ( *ndp ) ); + rc = -EINVAL; + goto done; + } + + /* Search for option */ + option = ndp->option; + remaining = ( iob_len ( iobuf ) - offsetof ( typeof ( *ndp ), option )); + while ( remaining ) { + + /* Sanity check */ + if ( ( remaining < sizeof ( *option ) ) || + ( option->blocks == 0 ) || + ( remaining < ( option->blocks * NDP_OPTION_BLKSZ ) ) ) { + DBGC ( netdev, "NDP bad option length:\n" ); + DBGC_HDA ( netdev, 0, option, remaining ); + rc = -EINVAL; + goto done; + } + option_len = ( option->blocks * NDP_OPTION_BLKSZ ); + option_value_len = ( option_len - sizeof ( *option ) ); + + /* Handle option */ + if ( ( rc = ndp_rx_option ( netdev, sin6_src, ndp, + option->type, option->value, + option_value_len ) ) != 0 ) { + goto done; + } + + /* Move to next option */ + option = ( ( ( void * ) option ) + option_len ); + remaining -= option_len; + } + + done: + free_iob ( iobuf ); + return rc; +} + +/** NDP ICMPv6 handlers */ +struct icmpv6_handler ndp_handlers[] __icmpv6_handler = { + { + .type = ICMPV6_NDP_NEIGHBOUR_SOLICITATION, + .rx = ndp_rx, + }, + { + .type = ICMPV6_NDP_NEIGHBOUR_ADVERTISEMENT, + .rx = ndp_rx, + }, +}; diff --git a/src/tests/ipv6_test.c b/src/tests/ipv6_test.c new file mode 100644 index 00000000..10e964d9 --- /dev/null +++ b/src/tests/ipv6_test.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** @file + * + * IPv6 tests + * + */ + +/* Forcibly enable assertions */ +#undef NDEBUG + +#include +#include +#include +#include +#include + +/** Define inline IPv6 address */ +#define IPV6(...) { __VA_ARGS__ } + +/** + * Report an inet6_ntoa() test result + * + * @v addr IPv6 address + * @v text Expected textual representation + */ +#define inet6_ntoa_ok( addr, text ) do { \ + static const struct in6_addr in = { \ + .s6_addr = addr, \ + }; \ + static const char expected[] = text; \ + char *actual; \ + \ + actual = inet6_ntoa ( &in ); \ + DBG ( "inet6_ntoa ( %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ) " \ + "= %s\n", ntohs ( in.s6_addr16[0] ), \ + ntohs ( in.s6_addr16[1] ), ntohs ( in.s6_addr16[2] ), \ + ntohs ( in.s6_addr16[3] ), ntohs ( in.s6_addr16[4] ), \ + ntohs ( in.s6_addr16[5] ), ntohs ( in.s6_addr16[6] ), \ + ntohs ( in.s6_addr16[7] ), actual ); \ + ok ( strcmp ( actual, expected ) == 0 ); \ + } while ( 0 ) + +/** + * Perform IPv6 self-tests + * + */ +static void ipv6_test_exec ( void ) { + + /* inet6_ntoa() tests */ + inet6_ntoa_ok ( IPV6 ( 0x20, 0x01, 0x0b, 0xa8, 0x00, 0x00, 0x01, 0xd4, + 0x00, 0x00, 0x00, 0x00, 0x69, 0x50, 0x58, 0x45 ), + "2001:ba8:0:1d4::6950:5845" ); + /* No zeros */ + inet6_ntoa_ok ( IPV6 ( 0x20, 0x01, 0x0d, 0xb8, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01 ), + "2001:db8:1:1:1:1:1:1" ); + /* Run of zeros */ + inet6_ntoa_ok ( IPV6 ( 0x20, 0x01, 0x0d, 0xb8, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 ), + "2001:db8::1" ); + /* No "::" for single zero */ + inet6_ntoa_ok ( IPV6 ( 0x20, 0x01, 0x0d, 0xb8, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01 ), + "2001:db8:0:1:1:1:1:1" ); + /* Use "::" for longest run of zeros */ + inet6_ntoa_ok ( IPV6 ( 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 ), + "2001:0:0:1::1" ); + /* Use "::" for leftmost equal-length run of zeros */ + inet6_ntoa_ok ( IPV6 ( 0x20, 0x01, 0x0d, 0xb8, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 ), + "2001:db8::1:0:0:1" ); + /* Trailing run of zeros */ + inet6_ntoa_ok ( IPV6 ( 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ), + "fe80::" ); + /* Leading run of zeros */ + inet6_ntoa_ok ( IPV6 ( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 ), + "::1" ); + /* All zeros */ + inet6_ntoa_ok ( IPV6 ( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ), + "::" ); + /* Maximum length */ + inet6_ntoa_ok ( IPV6 ( 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff ), + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff" ); +} + +/** IPv6 self-test */ +struct self_test ipv6_test __self_test = { + .name = "ipv6", + .exec = ipv6_test_exec, +}; diff --git a/src/tests/tests.c b/src/tests/tests.c index f965e6e3..17e22a3a 100644 --- a/src/tests/tests.c +++ b/src/tests/tests.c @@ -36,6 +36,7 @@ REQUIRE_OBJECT ( base16_test ); REQUIRE_OBJECT ( settings_test ); REQUIRE_OBJECT ( time_test ); REQUIRE_OBJECT ( tcpip_test ); +REQUIRE_OBJECT ( ipv6_test ); REQUIRE_OBJECT ( crc32_test ); REQUIRE_OBJECT ( md5_test ); REQUIRE_OBJECT ( sha1_test ); diff --git a/src/usr/route_ipv6.c b/src/usr/route_ipv6.c new file mode 100644 index 00000000..8a6fbde3 --- /dev/null +++ b/src/usr/route_ipv6.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include +#include + +/** @file + * + * IPv6 routing management + * + */ + +/** + * Print IPv6 routing table + * + * @v netdev Network device + */ +static void route_ipv6_print ( struct net_device *netdev ) { + struct ipv6_miniroute *miniroute; + + list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) { + if ( miniroute->netdev != netdev ) + continue; + printf ( "%s: %s/%d", netdev->name, + inet6_ntoa ( &miniroute->address ), + miniroute->prefix_len ); + if ( miniroute->has_router ) + printf ( " gw %s", inet6_ntoa ( &miniroute->router ) ); + if ( ! netdev_is_open ( miniroute->netdev ) ) + printf ( " (inaccessible)" ); + printf ( "\n" ); + } +} + +/** IPv6 routing family */ +struct routing_family ipv6_routing_family __routing_family ( ROUTING_IPV6 ) = { + .print = route_ipv6_print, +}; From 8aaa48beb80e792ea0552c77520921b0e51819af Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 3 Sep 2013 20:01:17 +0100 Subject: [PATCH 13/18] [ipv6] Fix uninitialised-variable warning Fix uninitialised-variable warning reported by gcc 4.5.2. Signed-off-by: Michael Brown --- src/net/ndp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/net/ndp.c b/src/net/ndp.c index 48b16d02..ee3a5b8b 100644 --- a/src/net/ndp.c +++ b/src/net/ndp.c @@ -352,6 +352,9 @@ static int ndp_rx ( struct io_buffer *iobuf, remaining -= option_len; } + /* Success */ + rc = 0; + done: free_iob ( iobuf ); return rc; From 8dd180f165eb8cd0a8475c0fd19bd6dbf17f95d8 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 4 Sep 2013 17:37:34 +0100 Subject: [PATCH 14/18] [tcp] Reduce path MTU to 1280 bytes The path MTU is currently hardcoded to 1460 bytes, which fails to allow space for TCP options. Sending a maximum-sized datagram (which is viable when using HTTP POST) will therefore fail since the Ethernet MTU will be exceeded. Reduce the hardcoded path MTU to produce a maximum datagram of 1280 bytes, which is the size required of data link layers by IPv6. It is a reasonable assumption that all intermediary data link layers will be able to convey this packet without fragmentation, even for IPv4. Note that this reduction has a minimal impact upon download throughput, since it affects only the transmit data path. Originally-fixed-by: Suresh Sundriyal Signed-off-by: Michael Brown --- src/include/ipxe/tcp.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/include/ipxe/tcp.h b/src/include/ipxe/tcp.h index 6b669117..eb4b7b22 100644 --- a/src/include/ipxe/tcp.h +++ b/src/include/ipxe/tcp.h @@ -316,10 +316,19 @@ struct tcp_options { /** * Path MTU * - * We really ought to implement Path MTU discovery. Until we do, - * anything with a path MTU greater than this may fail. + * IPv6 requires all data link layers to support a datagram size of + * 1280 bytes. We choose to use this as our maximum transmitted + * datagram size, on the assumption that any practical link layer we + * encounter will allow this size. This is a very conservative + * assumption in practice, but the impact of making such a + * conservative assumption is insignificant since the amount of data + * that we transmit (rather than receive) is negligible. + * + * We allow space within this 1280 bytes for an IPv6 header, a TCP + * header, and a (padded) TCP timestamp option. */ -#define TCP_PATH_MTU 1460 +#define TCP_PATH_MTU \ + ( 1280 - 40 /* IPv6 */ - 20 /* TCP */ - 12 /* TCP timestamp */ ) /** * Advertised TCP MSS From a9fa0d5f2bd5254464f63a7312857fcec83bf37f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 11 Sep 2013 04:26:39 +0100 Subject: [PATCH 15/18] [ipv6] Add inet6_aton() Signed-off-by: Michael Brown --- src/include/ipxe/in.h | 3 +- src/net/ipv6.c | 77 ++++++++++++++++++++++++++++++++++++++++--- src/tests/ipv6_test.c | 66 +++++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 5 deletions(-) diff --git a/src/include/ipxe/in.h b/src/include/ipxe/in.h index a1821b1f..a37784e2 100644 --- a/src/include/ipxe/in.h +++ b/src/include/ipxe/in.h @@ -120,6 +120,7 @@ struct sockaddr_in6 { extern int inet_aton ( const char *cp, struct in_addr *inp ); extern char * inet_ntoa ( struct in_addr in ); -extern char * inet6_ntoa ( const struct in6_addr *in6 ); +extern int inet6_aton ( const char *string, struct in6_addr *in ); +extern char * inet6_ntoa ( const struct in6_addr *in ); #endif /* _IPXE_IN_H */ diff --git a/src/net/ipv6.c b/src/net/ipv6.c index 69feba19..8dc251ba 100644 --- a/src/net/ipv6.c +++ b/src/net/ipv6.c @@ -622,11 +622,80 @@ static int ipv6_rx ( struct io_buffer *iobuf, struct net_device *netdev, return rc; } +/** + * Parse IPv6 address + * + * @v string IPv6 address string + * @ret in IPv6 address to fill in + * @ret rc Return status code + */ +int inet6_aton ( const char *string, struct in6_addr *in ) { + uint16_t *word = in->s6_addr16; + uint16_t *end = ( word + ( sizeof ( in->s6_addr16 ) / + sizeof ( in->s6_addr16[0] ) ) ); + uint16_t *pad = NULL; + const char *nptr = string; + char *endptr; + unsigned long value; + size_t pad_len; + size_t move_len; + + /* Parse string */ + while ( 1 ) { + + /* Parse current word */ + value = strtoul ( nptr, &endptr, 16 ); + if ( value > 0xffff ) { + DBG ( "IPv6 invalid word value %#lx in \"%s\"\n", + value, string ); + return -EINVAL; + } + *(word++) = htons ( value ); + + /* Parse separator */ + if ( ! *endptr ) + break; + if ( *endptr != ':' ) { + DBG ( "IPv6 invalid separator '%c' in \"%s\"\n", + *endptr, string ); + return -EINVAL; + } + if ( ( endptr == nptr ) && ( nptr != string ) ) { + if ( pad ) { + DBG ( "IPv6 invalid multiple \"::\" in " + "\"%s\"\n", string ); + return -EINVAL; + } + pad = word; + } + nptr = ( endptr + 1 ); + + /* Check for overrun */ + if ( word == end ) { + DBG ( "IPv6 too many words in \"%s\"\n", string ); + return -EINVAL; + } + } + + /* Insert padding if specified */ + if ( pad ) { + move_len = ( ( ( void * ) word ) - ( ( void * ) pad ) ); + pad_len = ( ( ( void * ) end ) - ( ( void * ) word ) ); + memmove ( ( ( ( void * ) pad ) + pad_len ), pad, move_len ); + memset ( pad, 0, pad_len ); + } else if ( word != end ) { + DBG ( "IPv6 underlength address \"%s\"\n", string ); + return -EINVAL; + } + + return 0; +} + /** * Convert IPv6 address to standard notation * - * @v in IPv6 address - * @ret string IPv6 address in standard notation + * @v in IPv6 address + * @ret string IPv6 address string in canonical format * * RFC5952 defines the canonical format for IPv6 textual representation. */ @@ -672,8 +741,8 @@ char * inet6_ntoa ( const struct in6_addr *in ) { /** * Transcribe IPv6 address * - * @v net_addr IPv6 address - * @ret string IPv6 address in standard notation + * @v net_addr IPv6 address + * @ret string IPv6 address in standard notation * */ static const char * ipv6_ntoa ( const void *net_addr ) { diff --git a/src/tests/ipv6_test.c b/src/tests/ipv6_test.c index 10e964d9..4de310ab 100644 --- a/src/tests/ipv6_test.c +++ b/src/tests/ipv6_test.c @@ -60,6 +60,37 @@ FILE_LICENCE ( GPL2_OR_LATER ); ok ( strcmp ( actual, expected ) == 0 ); \ } while ( 0 ) +/** + * Report an inet6_aton() test result + * + * @v text Textual representation + * @v addr Expected IPv6 address + */ +#define inet6_aton_ok( text, addr ) do { \ + static const char string[] = text; \ + static const struct in6_addr expected = { \ + .s6_addr = addr, \ + }; \ + struct in6_addr actual; \ + \ + ok ( inet6_aton ( string, &actual ) == 0 ); \ + DBG ( "inet6_aton ( \"%s\" ) = %s\n", string, \ + inet6_ntoa ( &actual ) ); \ + ok ( memcmp ( &actual, &expected, sizeof ( actual ) ) == 0 ); \ + } while ( 0 ) + +/** + * Report an inet6_aton() failure test result + * + * @v text Textual representation + */ +#define inet6_aton_fail_ok( text ) do { \ + static const char string[] = text; \ + struct in6_addr dummy; \ + \ + ok ( inet6_aton ( string, &dummy ) != 0 ); \ + } while ( 0 ) + /** * Perform IPv6 self-tests * @@ -106,6 +137,41 @@ static void ipv6_test_exec ( void ) { inet6_ntoa_ok ( IPV6 ( 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff ), "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff" ); + + /* inet6_aton() tests */ + inet6_aton_ok ( "2001:ba8:0:1d4::6950:5845", + IPV6 ( 0x20, 0x01, 0x0b, 0xa8, 0x00, 0x00, 0x01, 0xd4, + 0x00, 0x00, 0x00, 0x00, 0x69, 0x50, 0x58, 0x45)); + /* No zeros */ + inet6_aton_ok ( "2001:db8:1:1:1:1:1:1", + IPV6 ( 0x20, 0x01, 0x0d, 0xb8, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01)); + /* All intervening zeros */ + inet6_aton_ok ( "fe80::1", + IPV6 ( 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01)); + /* Trailing run of zeros */ + inet6_aton_ok ( "fe80::", + IPV6 ( 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)); + /* Leading run of zeros */ + inet6_aton_ok ( "::1", + IPV6 ( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01)); + /* All zeros */ + inet6_aton_ok ( "::", + IPV6 ( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)); + + /* inet6_aton() failure tests */ + inet6_aton_fail_ok ( "20012:ba8:0:1d4::6950:5845" ); + inet6_aton_fail_ok ( "200z:ba8:0:1d4::6950:5845" ); + inet6_aton_fail_ok ( "2001.ba8:0:1d4::6950:5845" ); + inet6_aton_fail_ok ( "2001:db8:1:1:1:1:1" ); + inet6_aton_fail_ok ( "2001:db8:1:1:1:1:1:1:2" ); + inet6_aton_fail_ok ( "2001:db8::1::2" ); + inet6_aton_fail_ok ( "2001:ba8:0:1d4:::6950:5845" ); + inet6_aton_fail_ok ( ":::" ); } /** IPv6 self-test */ From cba22d36b77da53890bd65fdadd0e63925687af0 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 25 Sep 2013 12:55:46 +0100 Subject: [PATCH 16/18] [build] Work around bug in gcc >= 4.8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 238050d ("[build] Work around bug in gcc >= 4.8") works around one instance of a bug in recent versions of gcc, in which "ebp" cannot be specified within an asm clobber list. Some versions of gcc seem to exhibit the same bug on other points in the codebase. Fix by changing all instances of "ebp" in a clobber list to use the push/pop %ebp workaround instead. Originally-implemented-by: Víctor Román Archidona Signed-off-by: Michael Brown --- src/arch/i386/drivers/net/undiload.c | 8 +++++--- src/arch/i386/firmware/pcbios/bios_console.c | 9 +++++---- src/arch/i386/image/bootsector.c | 7 ++++++- src/arch/i386/image/elfboot.c | 7 ++++--- src/arch/i386/image/nbi.c | 16 ++++++++++------ src/arch/i386/interface/pxeparent/pxeparent.c | 8 +++++--- 6 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/arch/i386/drivers/net/undiload.c b/src/arch/i386/drivers/net/undiload.c index f0f15e6a..77134dcb 100644 --- a/src/arch/i386/drivers/net/undiload.c +++ b/src/arch/i386/drivers/net/undiload.c @@ -103,13 +103,15 @@ int undi_load ( struct undi_device *undi, struct undi_rom *undirom ) { /* Call loader */ undi_loader_entry = undirom->loader_entry; - __asm__ __volatile__ ( REAL_CODE ( "pushw %%ds\n\t" + __asm__ __volatile__ ( REAL_CODE ( "pushl %%ebp\n\t" /* gcc bug */ + "pushw %%ds\n\t" "pushw %%ax\n\t" "lcall *undi_loader_entry\n\t" - "addw $4, %%sp\n\t" ) + "popl %%ebp\n\t" /* discard */ + "popl %%ebp\n\t" /* gcc bug */ ) : "=a" ( exit ) : "a" ( __from_data16 ( &undi_loader ) ) - : "ebx", "ecx", "edx", "esi", "edi", "ebp" ); + : "ebx", "ecx", "edx", "esi", "edi" ); if ( exit != PXENV_EXIT_SUCCESS ) { /* Clear entry point */ diff --git a/src/arch/i386/firmware/pcbios/bios_console.c b/src/arch/i386/firmware/pcbios/bios_console.c index 213ebd92..79e43708 100644 --- a/src/arch/i386/firmware/pcbios/bios_console.c +++ b/src/arch/i386/firmware/pcbios/bios_console.c @@ -167,7 +167,8 @@ static void bios_putchar ( int character ) { return; /* Print character with attribute */ - __asm__ __volatile__ ( REAL_CODE ( "sti\n\t" + __asm__ __volatile__ ( REAL_CODE ( "pushl %%ebp\n\t" /* gcc bug */ + "sti\n\t" /* Skip non-printable characters */ "cmpb $0x20, %%al\n\t" "jb 1f\n\t" @@ -188,11 +189,11 @@ static void bios_putchar ( int character ) { "xorw %%bx, %%bx\n\t" "movb $0x0e, %%ah\n\t" "int $0x10\n\t" - "cli\n\t" ) + "cli\n\t" + "popl %%ebp\n\t" /* gcc bug */ ) : "=a" ( discard_a ), "=b" ( discard_b ), "=c" ( discard_c ) - : "a" ( character ), "b" ( bios_attr ) - : "ebp" ); + : "a" ( character ), "b" ( bios_attr ) ); } /** diff --git a/src/arch/i386/image/bootsector.c b/src/arch/i386/image/bootsector.c index ab3cf94c..cb164fda 100644 --- a/src/arch/i386/image/bootsector.c +++ b/src/arch/i386/image/bootsector.c @@ -80,6 +80,8 @@ int call_bootsector ( unsigned int segment, unsigned int offset, "movw %%ss, %%ax\n\t" "movw %%ax, %%cs:saved_ss\n\t" "movw %%sp, %%cs:saved_sp\n\t" + /* Save frame pointer (gcc bug) */ + "movl %%ebp, %%cs:saved_ebp\n\t" /* Prepare jump to boot sector */ "pushw %%bx\n\t" "pushw %%di\n\t" @@ -99,11 +101,14 @@ int call_bootsector ( unsigned int segment, unsigned int offset, "sti\n\t" "lret\n\t" /* Preserved variables */ + "\nsaved_ebp: .long 0\n\t" "\nsaved_ss: .word 0\n\t" "\nsaved_sp: .word 0\n\t" "\nsaved_retaddr: .word 0\n\t" /* Boot failure return point */ "\nbootsector_exec_fail:\n\t" + /* Restore frame pointer (gcc bug) */ + "movl %%cs:saved_ebp, %%ebp\n\t" /* Restore stack pointer */ "movw %%cs:saved_ss, %%ax\n\t" "movw %%ax, %%ss\n\t" @@ -114,7 +119,7 @@ int call_bootsector ( unsigned int segment, unsigned int offset, "=d" ( discard_d ) : "b" ( segment ), "D" ( offset ), "d" ( drive ) - : "eax", "ecx", "esi", "ebp" ); + : "eax", "ecx", "esi" ); DBG ( "Booted disk returned via INT 18 or 19\n" ); diff --git a/src/arch/i386/image/elfboot.c b/src/arch/i386/image/elfboot.c index a867a956..0f6957f0 100644 --- a/src/arch/i386/image/elfboot.c +++ b/src/arch/i386/image/elfboot.c @@ -60,10 +60,11 @@ static int elfboot_exec ( struct image *image ) { /* Jump to OS with flat physical addressing */ DBGC ( image, "ELF %p starting execution at %lx\n", image, entry ); - __asm__ __volatile__ ( PHYS_CODE ( "call *%%edi\n\t" ) + __asm__ __volatile__ ( PHYS_CODE ( "pushl %%ebp\n\t" /* gcc bug */ + "call *%%edi\n\t" + "popl %%ebp\n\t" /* gcc bug */ ) : : "D" ( entry ) - : "eax", "ebx", "ecx", "edx", "esi", "ebp", - "memory" ); + : "eax", "ebx", "ecx", "edx", "esi", "memory" ); DBGC ( image, "ELF %p returned\n", image ); diff --git a/src/arch/i386/image/nbi.c b/src/arch/i386/image/nbi.c index d3e523e9..99046144 100644 --- a/src/arch/i386/image/nbi.c +++ b/src/arch/i386/image/nbi.c @@ -248,7 +248,8 @@ static int nbi_boot16 ( struct image *image, struct imgheader *imgheader ) { imgheader->execaddr.segoff.offset ); __asm__ __volatile__ ( - REAL_CODE ( "pushw %%ds\n\t" /* far pointer to bootp data */ + REAL_CODE ( "pushl %%ebp\n\t" /* gcc bug */ + "pushw %%ds\n\t" /* far pointer to bootp data */ "pushw %%bx\n\t" "pushl %%esi\n\t" /* location */ "pushw %%cs\n\t" /* lcall execaddr */ @@ -258,13 +259,14 @@ static int nbi_boot16 ( struct image *image, struct imgheader *imgheader ) { "pushl %%edi\n\t" "lret\n\t" "\n2:\n\t" - "addw $8,%%sp\n\t" /* clean up stack */ ) + "addw $8,%%sp\n\t" /* clean up stack */ + "popl %%ebp\n\t" /* gcc bug */ ) : "=a" ( rc ), "=D" ( discard_D ), "=S" ( discard_S ), "=b" ( discard_b ) : "D" ( imgheader->execaddr.segoff ), "S" ( imgheader->location ), "b" ( __from_data16 ( basemem_packet ) ) - : "ecx", "edx", "ebp" ); + : "ecx", "edx" ); return rc; } @@ -288,11 +290,13 @@ static int nbi_boot32 ( struct image *image, struct imgheader *imgheader ) { /* Jump to OS with flat physical addressing */ __asm__ __volatile__ ( - PHYS_CODE ( "pushl %%ebx\n\t" /* bootp data */ + PHYS_CODE ( "pushl %%ebp\n\t" /* gcc bug */ + "pushl %%ebx\n\t" /* bootp data */ "pushl %%esi\n\t" /* imgheader */ "pushl %%eax\n\t" /* loaderinfo */ "call *%%edi\n\t" - "addl $12, %%esp\n\t" /* clean up stack */ ) + "addl $12, %%esp\n\t" /* clean up stack */ + "popl %%ebp\n\t" /* gcc bug */ ) : "=a" ( rc ), "=D" ( discard_D ), "=S" ( discard_S ), "=b" ( discard_b ) : "D" ( imgheader->execaddr.linear ), @@ -300,7 +304,7 @@ static int nbi_boot32 ( struct image *image, struct imgheader *imgheader ) { imgheader->location.offset ), "b" ( virt_to_phys ( basemem_packet ) ), "a" ( virt_to_phys ( &loaderinfo ) ) - : "ecx", "edx", "ebp", "memory" ); + : "ecx", "edx", "memory" ); return rc; } diff --git a/src/arch/i386/interface/pxeparent/pxeparent.c b/src/arch/i386/interface/pxeparent/pxeparent.c index b2c6ffba..9d2948c5 100644 --- a/src/arch/i386/interface/pxeparent/pxeparent.c +++ b/src/arch/i386/interface/pxeparent/pxeparent.c @@ -143,16 +143,18 @@ int pxeparent_call ( SEGOFF16_t entry, unsigned int function, /* Call real-mode entry point. This calling convention will * work with both the !PXE and the PXENV+ entry points. */ - __asm__ __volatile__ ( REAL_CODE ( "pushw %%es\n\t" + __asm__ __volatile__ ( REAL_CODE ( "pushl %%ebp\n\t" /* gcc bug */ + "pushw %%es\n\t" "pushw %%di\n\t" "pushw %%bx\n\t" "lcall *pxeparent_entry_point\n\t" - "addw $6, %%sp\n\t" ) + "addw $6, %%sp\n\t" + "popl %%ebp\n\t" /* gcc bug */ ) : "=a" ( exit ), "=b" ( discard_b ), "=D" ( discard_D ) : "b" ( function ), "D" ( __from_data16 ( &pxeparent_params ) ) - : "ecx", "edx", "esi", "ebp" ); + : "ecx", "edx", "esi" ); /* Determine return status code based on PXENV_EXIT and * PXENV_STATUS From 37ccbd301df299880dcaeae6e48362e998f66c6a Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 25 Sep 2013 14:34:00 +0100 Subject: [PATCH 17/18] [neighbour] Add nstat() function to print out neighbour table Signed-off-by: Michael Brown --- src/include/ipxe/neighbour.h | 44 ++++++++++++++++++++++++++++ src/include/usr/neighmgmt.h | 14 +++++++++ src/net/neighbour.c | 42 +-------------------------- src/usr/neighmgmt.c | 56 ++++++++++++++++++++++++++++++++++++ 4 files changed, 115 insertions(+), 41 deletions(-) create mode 100644 src/include/usr/neighmgmt.h create mode 100644 src/usr/neighmgmt.c diff --git a/src/include/ipxe/neighbour.h b/src/include/ipxe/neighbour.h index 5720e8b0..f2a3946f 100644 --- a/src/include/ipxe/neighbour.h +++ b/src/include/ipxe/neighbour.h @@ -9,7 +9,11 @@ FILE_LICENCE ( GPL2_OR_LATER ); +#include +#include +#include #include +#include /** A neighbour discovery protocol */ struct neighbour_discovery { @@ -29,6 +33,46 @@ struct neighbour_discovery { const void *net_dest, const void *net_source ); }; +/** A neighbour cache entry */ +struct neighbour { + /** Reference count */ + struct refcnt refcnt; + /** List of neighbour cache entries */ + struct list_head list; + + /** Network device */ + struct net_device *netdev; + /** Network-layer protocol */ + struct net_protocol *net_protocol; + /** Network-layer destination address */ + uint8_t net_dest[MAX_NET_ADDR_LEN]; + /** Link-layer destination address */ + uint8_t ll_dest[MAX_LL_ADDR_LEN]; + + /** Neighbour discovery protocol (if any) */ + struct neighbour_discovery *discovery; + /** Network-layer source address (if any) */ + uint8_t net_source[MAX_NET_ADDR_LEN]; + /** Retransmission timer */ + struct retry_timer timer; + + /** Pending I/O buffers */ + struct list_head tx_queue; +}; + +/** + * Test if neighbour cache entry has a valid link-layer address + * + * @v neighbour Neighbour cache entry + * @ret has_ll_dest Neighbour cache entry has a valid link-layer address + */ +static inline __attribute__ (( always_inline )) int +neighbour_has_ll_dest ( struct neighbour *neighbour ) { + return ( ! timer_running ( &neighbour->timer ) ); +} + +extern struct list_head neighbours; + extern int neighbour_tx ( struct io_buffer *iobuf, struct net_device *netdev, struct net_protocol *net_protocol, const void *net_dest, diff --git a/src/include/usr/neighmgmt.h b/src/include/usr/neighmgmt.h new file mode 100644 index 00000000..3c2b704a --- /dev/null +++ b/src/include/usr/neighmgmt.h @@ -0,0 +1,14 @@ +#ifndef _USR_NEIGHMGMT_H +#define _USR_NEIGHMGMT_H + +/** @file + * + * Neighbour management + * + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +extern void nstat ( void ); + +#endif /* _USR_NEIGHMGMT_H */ diff --git a/src/net/neighbour.c b/src/net/neighbour.c index 210b7138..c175665a 100644 --- a/src/net/neighbour.c +++ b/src/net/neighbour.c @@ -23,8 +23,6 @@ FILE_LICENCE ( GPL2_OR_LATER ); #include #include #include -#include -#include #include #include #include @@ -40,33 +38,6 @@ FILE_LICENCE ( GPL2_OR_LATER ); * */ -/** A neighbour cache entry */ -struct neighbour { - /** Reference count */ - struct refcnt refcnt; - /** List of neighbour cache entries */ - struct list_head list; - - /** Network device */ - struct net_device *netdev; - /** Network-layer protocol */ - struct net_protocol *net_protocol; - /** Network-layer destination address */ - uint8_t net_dest[MAX_NET_ADDR_LEN]; - /** Link-layer destination address */ - uint8_t ll_dest[MAX_LL_ADDR_LEN]; - - /** Neighbour discovery protocol (if any) */ - struct neighbour_discovery *discovery; - /** Network-layer source address (if any) */ - uint8_t net_source[MAX_NET_ADDR_LEN]; - /** Retransmission timer */ - struct retry_timer timer; - - /** Pending I/O buffers */ - struct list_head tx_queue; -}; - /** Neighbour discovery minimum timeout */ #define NEIGHBOUR_MIN_TIMEOUT ( TICKS_PER_SEC / 8 ) @@ -74,7 +45,7 @@ struct neighbour { #define NEIGHBOUR_MAX_TIMEOUT ( TICKS_PER_SEC * 3 ) /** The neighbour cache */ -static LIST_HEAD ( neighbours ); +struct list_head neighbours = LIST_HEAD_INIT ( neighbours ); static void neighbour_expired ( struct retry_timer *timer, int over ); @@ -97,17 +68,6 @@ static void neighbour_free ( struct refcnt *refcnt ) { free ( neighbour ); } -/** - * Test if neighbour cache entry has a valid link-layer address - * - * @v neighbour Neighbour cache entry - * @ret has_ll_dest Neighbour cache entry has a valid link-layer address - */ -static inline __attribute__ (( always_inline )) int -neighbour_has_ll_dest ( struct neighbour *neighbour ) { - return ( ! timer_running ( &neighbour->timer ) ); -} - /** * Create neighbour cache entry * diff --git a/src/usr/neighmgmt.c b/src/usr/neighmgmt.c new file mode 100644 index 00000000..e4d21a20 --- /dev/null +++ b/src/usr/neighmgmt.c @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +#include +#include +#include + +/** @file + * + * Neighbour management + * + */ + +/** + * Print neighbour table + * + */ +void nstat ( void ) { + struct neighbour *neighbour; + struct net_device *netdev; + struct ll_protocol *ll_protocol; + struct net_protocol *net_protocol; + + list_for_each_entry ( neighbour, &neighbours, list ) { + netdev = neighbour->netdev; + ll_protocol = netdev->ll_protocol; + net_protocol = neighbour->net_protocol; + printf ( "%s %s %s is %s %s", netdev->name, net_protocol->name, + net_protocol->ntoa ( neighbour->net_dest ), + ll_protocol->name, + ( neighbour_has_ll_dest ( neighbour ) ? + ll_protocol->ntoa ( neighbour->ll_dest ) : + "(incomplete)" ) ); + if ( neighbour->discovery ) + printf ( " (%s)", neighbour->discovery->name ); + printf ( "\n" ); + } +} From 7405685df2bea9a457970d8b5a63ede08fcda6f7 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 25 Sep 2013 14:35:07 +0100 Subject: [PATCH 18/18] [cmdline] Add "nstat" command Signed-off-by: Michael Brown --- src/config/config.c | 3 ++ src/config/general.h | 1 + src/hci/commands/neighbour_cmd.c | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 src/hci/commands/neighbour_cmd.c diff --git a/src/config/config.c b/src/config/config.c index 6596e951..13fa7db4 100644 --- a/src/config/config.c +++ b/src/config/config.c @@ -269,6 +269,9 @@ REQUIRE_OBJECT ( pci_cmd ); #ifdef PARAM_CMD REQUIRE_OBJECT ( param_cmd ); #endif +#ifdef NEIGHBOUR_CMD +REQUIRE_OBJECT ( neighbour_cmd ); +#endif /* * Drag in miscellaneous objects diff --git a/src/config/general.h b/src/config/general.h index 2e93efde..00a327f2 100644 --- a/src/config/general.h +++ b/src/config/general.h @@ -132,6 +132,7 @@ FILE_LICENCE ( GPL2_OR_LATER ); //#define IMAGE_TRUST_CMD /* Image trust management commands */ //#define PCI_CMD /* PCI commands */ //#define PARAM_CMD /* Form parameter commands */ +//#define NEIGHBOUR_CMD /* Neighbour management commands */ /* * ROM-specific options diff --git a/src/hci/commands/neighbour_cmd.c b/src/hci/commands/neighbour_cmd.c new file mode 100644 index 00000000..d65c355c --- /dev/null +++ b/src/hci/commands/neighbour_cmd.c @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2013 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +FILE_LICENCE ( GPL2_OR_LATER ); + +/** @file + * + * Neighbour management commands + * + */ + +#include +#include +#include +#include + +/** "nstat" options */ +struct nstat_options {}; + +/** "nstat" option list */ +static struct option_descriptor nstat_opts[] = {}; + +/** "nstat" command descriptor */ +static struct command_descriptor nstat_cmd = + COMMAND_DESC ( struct nstat_options, nstat_opts, 0, 0, "" ); + +/** + * The "nstat" command + * + * @v argc Argument count + * @v argv Argument list + * @ret rc Return status code + */ +static int nstat_exec ( int argc, char **argv ) { + struct nstat_options opts; + int rc; + + /* Parse options */ + if ( ( rc = parse_options ( argc, argv, &nstat_cmd, &opts ) ) != 0) + return rc; + + nstat(); + + return 0; +} + +/** Neighbour management commands */ +struct command neighbour_commands[] __command = { + { + .name = "nstat", + .exec = nstat_exec, + }, +};