[infiniband] Allow for the creation of multicast groups
[ipxe.git] / src / net / ipv6.c
1 /*
2 * Copyright (C) 2013 Michael Brown <mbrown@fensystems.co.uk>.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA.
18 */
19
20 FILE_LICENCE ( GPL2_OR_LATER );
21
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <errno.h>
27 #include <assert.h>
28 #include <byteswap.h>
29 #include <ipxe/iobuf.h>
30 #include <ipxe/tcpip.h>
31 #include <ipxe/if_ether.h>
32 #include <ipxe/crc32.h>
33 #include <ipxe/fragment.h>
34 #include <ipxe/ipstat.h>
35 #include <ipxe/ndp.h>
36 #include <ipxe/ipv6.h>
37
38 /** @file
39 *
40 * IPv6 protocol
41 *
42 */
43
44 /* Disambiguate the various error causes */
45 #define EINVAL_LEN __einfo_error ( EINFO_EINVAL_LEN )
46 #define EINFO_EINVAL_LEN \
47 __einfo_uniqify ( EINFO_EINVAL, 0x01, "Invalid length" )
48 #define ENOTSUP_VER __einfo_error ( EINFO_ENOTSUP_VER )
49 #define EINFO_ENOTSUP_VER \
50 __einfo_uniqify ( EINFO_ENOTSUP, 0x01, "Unsupported version" )
51 #define ENOTSUP_HDR __einfo_error ( EINFO_ENOTSUP_HDR )
52 #define EINFO_ENOTSUP_HDR \
53 __einfo_uniqify ( EINFO_ENOTSUP, 0x02, "Unsupported header type" )
54 #define ENOTSUP_OPT __einfo_error ( EINFO_ENOTSUP_OPT )
55 #define EINFO_ENOTSUP_OPT \
56 __einfo_uniqify ( EINFO_ENOTSUP, 0x03, "Unsupported option" )
57
58 /** List of IPv6 miniroutes */
59 struct list_head ipv6_miniroutes = LIST_HEAD_INIT ( ipv6_miniroutes );
60
61 /** IPv6 statistics */
62 static struct ip_statistics ipv6_stats;
63
64 /** IPv6 statistics family */
65 struct ip_statistics_family
66 ipv6_statistics_family __ip_statistics_family ( IP_STATISTICS_IPV6 ) = {
67 .version = 6,
68 .stats = &ipv6_stats,
69 };
70
71 /**
72 * Determine debugging colour for IPv6 debug messages
73 *
74 * @v in IPv6 address
75 * @ret col Debugging colour (for DBGC())
76 */
77 static uint32_t ipv6col ( struct in6_addr *in ) {
78 return crc32_le ( 0, in, sizeof ( *in ) );
79 }
80
81 /**
82 * Dump IPv6 routing table entry
83 *
84 * @v miniroute Routing table entry
85 */
86 static inline __attribute__ (( always_inline )) void
87 ipv6_dump_miniroute ( struct ipv6_miniroute *miniroute ) {
88 struct net_device *netdev = miniroute->netdev;
89
90 DBGC ( netdev, "IPv6 %s has %s %s/%d", netdev->name,
91 ( ( miniroute->flags & IPV6_HAS_ADDRESS ) ?
92 "address" : "prefix" ),
93 inet6_ntoa ( &miniroute->address ), miniroute->prefix_len );
94 if ( miniroute->flags & IPV6_HAS_ROUTER )
95 DBGC ( netdev, " router %s", inet6_ntoa ( &miniroute->router ));
96 DBGC ( netdev, "\n" );
97 }
98
99 /**
100 * Check if network device has a specific IPv6 address
101 *
102 * @v netdev Network device
103 * @v addr IPv6 address
104 * @ret has_addr Network device has this IPv6 address
105 */
106 int ipv6_has_addr ( struct net_device *netdev, struct in6_addr *addr ) {
107 struct ipv6_miniroute *miniroute;
108
109 list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) {
110 if ( ( miniroute->netdev == netdev ) &&
111 ( miniroute->flags & IPV6_HAS_ADDRESS ) &&
112 ( memcmp ( &miniroute->address, addr,
113 sizeof ( miniroute->address ) ) == 0 ) ) {
114 /* Found matching address */
115 return 1;
116 }
117 }
118 return 0;
119 }
120
121 /**
122 * Check if IPv6 address is within a routing table entry's local network
123 *
124 * @v miniroute Routing table entry
125 * @v address IPv6 address
126 * @ret is_on_link Address is within this entry's local network
127 */
128 static int ipv6_is_on_link ( struct ipv6_miniroute *miniroute,
129 struct in6_addr *address ) {
130 unsigned int i;
131
132 for ( i = 0 ; i < ( sizeof ( address->s6_addr32 ) /
133 sizeof ( address->s6_addr32[0] ) ) ; i++ ) {
134 if ( (( address->s6_addr32[i] ^ miniroute->address.s6_addr32[i])
135 & miniroute->prefix_mask.s6_addr32[i] ) != 0 )
136 return 0;
137 }
138 return 1;
139 }
140
141 /**
142 * Find IPv6 routing table entry for a given address
143 *
144 * @v netdev Network device
145 * @v address IPv6 address
146 * @ret miniroute Routing table entry, or NULL if not found
147 */
148 static struct ipv6_miniroute * ipv6_miniroute ( struct net_device *netdev,
149 struct in6_addr *address ) {
150 struct ipv6_miniroute *miniroute;
151
152 list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) {
153 if ( ( miniroute->netdev == netdev ) &&
154 ipv6_is_on_link ( miniroute, address ) ) {
155 return miniroute;
156 }
157 }
158 return NULL;
159 }
160
161 /**
162 * Add IPv6 routing table entry
163 *
164 * @v netdev Network device
165 * @v address IPv6 address (or prefix)
166 * @v prefix_len Prefix length
167 * @v flags Flags
168 * @ret miniroute Routing table entry, or NULL on failure
169 */
170 static struct ipv6_miniroute * ipv6_add_miniroute ( struct net_device *netdev,
171 struct in6_addr *address,
172 unsigned int prefix_len,
173 unsigned int flags ) {
174 struct ipv6_miniroute *miniroute;
175 uint8_t *prefix_mask;
176
177 /* Create routing table entry */
178 miniroute = zalloc ( sizeof ( *miniroute ) );
179 if ( ! miniroute )
180 return NULL;
181 miniroute->netdev = netdev_get ( netdev );
182 memcpy ( &miniroute->address, address, sizeof ( miniroute->address ) );
183 miniroute->prefix_len = prefix_len;
184 assert ( prefix_len <= ( 8 * sizeof ( miniroute->prefix_mask ) ) );
185 for ( prefix_mask = miniroute->prefix_mask.s6_addr ; prefix_len >= 8 ;
186 prefix_mask++, prefix_len -= 8 ) {
187 *prefix_mask = 0xff;
188 }
189 if ( prefix_len )
190 *prefix_mask <<= ( 8 - prefix_len );
191 miniroute->flags = flags;
192 list_add ( &miniroute->list, &ipv6_miniroutes );
193 ipv6_dump_miniroute ( miniroute );
194
195 return miniroute;
196 }
197
198 /**
199 * Define IPv6 on-link prefix
200 *
201 * @v netdev Network device
202 * @v prefix IPv6 address prefix
203 * @v prefix_len Prefix length
204 * @v router Router address (or NULL)
205 * @ret rc Return status code
206 */
207 int ipv6_set_prefix ( struct net_device *netdev, struct in6_addr *prefix,
208 unsigned int prefix_len, struct in6_addr *router ) {
209 struct ipv6_miniroute *miniroute;
210 int changed;
211
212 /* Find or create routing table entry */
213 miniroute = ipv6_miniroute ( netdev, prefix );
214 if ( ! miniroute )
215 miniroute = ipv6_add_miniroute ( netdev, prefix, prefix_len, 0);
216 if ( ! miniroute )
217 return -ENOMEM;
218
219 /* Record router and add to start or end of list as appropriate */
220 list_del ( &miniroute->list );
221 if ( router ) {
222 changed = ( ( ! ( miniroute->flags & IPV6_HAS_ROUTER ) ) ||
223 ( memcmp ( &miniroute->router, router,
224 sizeof ( miniroute->router ) ) != 0 ) );
225 miniroute->flags |= IPV6_HAS_ROUTER;
226 memcpy ( &miniroute->router, router,
227 sizeof ( miniroute->router ) );
228 list_add_tail ( &miniroute->list, &ipv6_miniroutes );
229 } else {
230 changed = ( miniroute->flags & IPV6_HAS_ROUTER );
231 miniroute->flags &= ~IPV6_HAS_ROUTER;
232 list_add ( &miniroute->list, &ipv6_miniroutes );
233 }
234 if ( changed )
235 ipv6_dump_miniroute ( miniroute );
236
237 return 0;
238 }
239
240 /**
241 * Add IPv6 on-link address
242 *
243 * @v netdev Network device
244 * @v address IPv6 address
245 * @ret rc Return status code
246 *
247 * An on-link prefix for the address must already exist.
248 */
249 int ipv6_set_address ( struct net_device *netdev, struct in6_addr *address ) {
250 struct ipv6_miniroute *miniroute;
251 int changed;
252
253 /* Find routing table entry */
254 miniroute = ipv6_miniroute ( netdev, address );
255 if ( ! miniroute )
256 return -EADDRNOTAVAIL;
257
258 /* Record address */
259 changed = ( ( ! ( miniroute->flags & IPV6_HAS_ADDRESS ) ) ||
260 ( memcmp ( &miniroute->address, address,
261 sizeof ( miniroute->address ) ) != 0 ) );
262 memcpy ( &miniroute->address, address, sizeof ( miniroute->address ) );
263 miniroute->flags |= IPV6_HAS_ADDRESS;
264 if ( changed )
265 ipv6_dump_miniroute ( miniroute );
266
267 return 0;
268 }
269
270 /**
271 * Perform IPv6 routing
272 *
273 * @v scope_id Destination address scope ID (for link-local addresses)
274 * @v dest Final destination address
275 * @ret dest Next hop destination address
276 * @ret miniroute Routing table entry to use, or NULL if no route
277 */
278 static struct ipv6_miniroute * ipv6_route ( unsigned int scope_id,
279 struct in6_addr **dest ) {
280 struct ipv6_miniroute *miniroute;
281
282 /* Find first usable route in routing table */
283 list_for_each_entry ( miniroute, &ipv6_miniroutes, list ) {
284
285 /* Skip closed network devices */
286 if ( ! netdev_is_open ( miniroute->netdev ) )
287 continue;
288
289 /* Skip routing table entries with no usable source address */
290 if ( ! ( miniroute->flags & IPV6_HAS_ADDRESS ) )
291 continue;
292
293 if ( IN6_IS_ADDR_NONGLOBAL ( *dest ) ) {
294
295 /* If destination is non-global, and the scope ID
296 * matches this network device, then use this route.
297 */
298 if ( miniroute->netdev->index == scope_id )
299 return miniroute;
300
301 } else {
302
303 /* If destination is an on-link global
304 * address, then use this route.
305 */
306 if ( ipv6_is_on_link ( miniroute, *dest ) )
307 return miniroute;
308
309 /* If destination is an off-link global
310 * address, and we have a default gateway,
311 * then use this route.
312 */
313 if ( miniroute->flags & IPV6_HAS_ROUTER ) {
314 *dest = &miniroute->router;
315 return miniroute;
316 }
317 }
318 }
319
320 return NULL;
321 }
322
323 /**
324 * Determine transmitting network device
325 *
326 * @v st_dest Destination network-layer address
327 * @ret netdev Transmitting network device, or NULL
328 */
329 static struct net_device * ipv6_netdev ( struct sockaddr_tcpip *st_dest ) {
330 struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest );
331 struct in6_addr *dest = &sin6_dest->sin6_addr;
332 struct ipv6_miniroute *miniroute;
333
334 /* Find routing table entry */
335 miniroute = ipv6_route ( sin6_dest->sin6_scope_id, &dest );
336 if ( ! miniroute )
337 return NULL;
338
339 return miniroute->netdev;
340 }
341
342 /**
343 * Check that received options can be safely ignored
344 *
345 * @v iphdr IPv6 header
346 * @v options Options extension header
347 * @v len Maximum length of header
348 * @ret rc Return status code
349 */
350 static int ipv6_check_options ( struct ipv6_header *iphdr,
351 struct ipv6_options_header *options,
352 size_t len ) {
353 struct ipv6_option *option = options->options;
354 struct ipv6_option *end = ( ( ( void * ) options ) + len );
355
356 while ( option < end ) {
357 if ( ! IPV6_CAN_IGNORE_OPT ( option->type ) ) {
358 DBGC ( ipv6col ( &iphdr->src ), "IPv6 unrecognised "
359 "option type %#02x:\n", option->type );
360 DBGC_HDA ( ipv6col ( &iphdr->src ), 0,
361 options, len );
362 return -ENOTSUP_OPT;
363 }
364 if ( option->type == IPV6_OPT_PAD1 ) {
365 option = ( ( ( void * ) option ) + 1 );
366 } else {
367 option = ( ( ( void * ) option->value ) + option->len );
368 }
369 }
370 return 0;
371 }
372
373 /**
374 * Check if fragment matches fragment reassembly buffer
375 *
376 * @v fragment Fragment reassembly buffer
377 * @v iobuf I/O buffer
378 * @v hdrlen Length of non-fragmentable potion of I/O buffer
379 * @ret is_fragment Fragment matches this reassembly buffer
380 */
381 static int ipv6_is_fragment ( struct fragment *fragment,
382 struct io_buffer *iobuf, size_t hdrlen ) {
383 struct ipv6_header *frag_iphdr = fragment->iobuf->data;
384 struct ipv6_fragment_header *frag_fhdr =
385 ( fragment->iobuf->data + fragment->hdrlen -
386 sizeof ( *frag_fhdr ) );
387 struct ipv6_header *iphdr = iobuf->data;
388 struct ipv6_fragment_header *fhdr =
389 ( iobuf->data + hdrlen - sizeof ( *fhdr ) );
390
391 return ( ( memcmp ( &iphdr->src, &frag_iphdr->src,
392 sizeof ( iphdr->src ) ) == 0 ) &&
393 ( fhdr->ident == frag_fhdr->ident ) );
394 }
395
396 /**
397 * Get fragment offset
398 *
399 * @v iobuf I/O buffer
400 * @v hdrlen Length of non-fragmentable potion of I/O buffer
401 * @ret offset Offset
402 */
403 static size_t ipv6_fragment_offset ( struct io_buffer *iobuf, size_t hdrlen ) {
404 struct ipv6_fragment_header *fhdr =
405 ( iobuf->data + hdrlen - sizeof ( *fhdr ) );
406
407 return ( ntohs ( fhdr->offset_more ) & IPV6_MASK_OFFSET );
408 }
409
410 /**
411 * Check if more fragments exist
412 *
413 * @v iobuf I/O buffer
414 * @v hdrlen Length of non-fragmentable potion of I/O buffer
415 * @ret more_frags More fragments exist
416 */
417 static int ipv6_more_fragments ( struct io_buffer *iobuf, size_t hdrlen ) {
418 struct ipv6_fragment_header *fhdr =
419 ( iobuf->data + hdrlen - sizeof ( *fhdr ) );
420
421 return ( fhdr->offset_more & htons ( IPV6_MASK_MOREFRAGS ) );
422 }
423
424 /** Fragment reassembler */
425 static struct fragment_reassembler ipv6_reassembler = {
426 .list = LIST_HEAD_INIT ( ipv6_reassembler.list ),
427 .is_fragment = ipv6_is_fragment,
428 .fragment_offset = ipv6_fragment_offset,
429 .more_fragments = ipv6_more_fragments,
430 .stats = &ipv6_stats,
431 };
432
433 /**
434 * Calculate IPv6 pseudo-header checksum
435 *
436 * @v iphdr IPv6 header
437 * @v len Payload length
438 * @v next_header Next header type
439 * @v csum Existing checksum
440 * @ret csum Updated checksum
441 */
442 static uint16_t ipv6_pshdr_chksum ( struct ipv6_header *iphdr, size_t len,
443 int next_header, uint16_t csum ) {
444 struct ipv6_pseudo_header pshdr;
445
446 /* Build pseudo-header */
447 memcpy ( &pshdr.src, &iphdr->src, sizeof ( pshdr.src ) );
448 memcpy ( &pshdr.dest, &iphdr->dest, sizeof ( pshdr.dest ) );
449 pshdr.len = htonl ( len );
450 memset ( pshdr.zero, 0, sizeof ( pshdr.zero ) );
451 pshdr.next_header = next_header;
452
453 /* Update the checksum value */
454 return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
455 }
456
457 /**
458 * Transmit IPv6 packet
459 *
460 * @v iobuf I/O buffer
461 * @v tcpip Transport-layer protocol
462 * @v st_src Source network-layer address
463 * @v st_dest Destination network-layer address
464 * @v netdev Network device to use if no route found, or NULL
465 * @v trans_csum Transport-layer checksum to complete, or NULL
466 * @ret rc Status
467 *
468 * This function expects a transport-layer segment and prepends the
469 * IPv6 header
470 */
471 static int ipv6_tx ( struct io_buffer *iobuf,
472 struct tcpip_protocol *tcpip_protocol,
473 struct sockaddr_tcpip *st_src,
474 struct sockaddr_tcpip *st_dest,
475 struct net_device *netdev,
476 uint16_t *trans_csum ) {
477 struct sockaddr_in6 *sin6_src = ( ( struct sockaddr_in6 * ) st_src );
478 struct sockaddr_in6 *sin6_dest = ( ( struct sockaddr_in6 * ) st_dest );
479 struct ipv6_miniroute *miniroute;
480 struct ipv6_header *iphdr;
481 struct in6_addr *src = NULL;
482 struct in6_addr *next_hop;
483 uint8_t ll_dest_buf[MAX_LL_ADDR_LEN];
484 const void *ll_dest;
485 size_t len;
486 int rc;
487
488 /* Update statistics */
489 ipv6_stats.out_requests++;
490
491 /* Fill up the IPv6 header, except source address */
492 len = iob_len ( iobuf );
493 iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
494 memset ( iphdr, 0, sizeof ( *iphdr ) );
495 iphdr->ver_tc_label = htonl ( IPV6_VER );
496 iphdr->len = htons ( len );
497 iphdr->next_header = tcpip_protocol->tcpip_proto;
498 iphdr->hop_limit = IPV6_HOP_LIMIT;
499 memcpy ( &iphdr->dest, &sin6_dest->sin6_addr, sizeof ( iphdr->dest ) );
500
501 /* Use routing table to identify next hop and transmitting netdev */
502 next_hop = &iphdr->dest;
503 if ( ( miniroute = ipv6_route ( sin6_dest->sin6_scope_id,
504 &next_hop ) ) != NULL ) {
505 src = &miniroute->address;
506 netdev = miniroute->netdev;
507 }
508 if ( ! netdev ) {
509 DBGC ( ipv6col ( &iphdr->dest ), "IPv6 has no route to %s\n",
510 inet6_ntoa ( &iphdr->dest ) );
511 ipv6_stats.out_no_routes++;
512 rc = -ENETUNREACH;
513 goto err;
514 }
515 if ( sin6_src && ! IN6_IS_ADDR_UNSPECIFIED ( &sin6_src->sin6_addr ) )
516 src = &sin6_src->sin6_addr;
517 if ( src )
518 memcpy ( &iphdr->src, src, sizeof ( iphdr->src ) );
519
520 /* Fix up checksums */
521 if ( trans_csum ) {
522 *trans_csum = ipv6_pshdr_chksum ( iphdr, len,
523 tcpip_protocol->tcpip_proto,
524 *trans_csum );
525 if ( ! *trans_csum )
526 *trans_csum = tcpip_protocol->zero_csum;
527 }
528
529 /* Print IPv6 header for debugging */
530 DBGC2 ( ipv6col ( &iphdr->dest ), "IPv6 TX %s->",
531 inet6_ntoa ( &iphdr->src ) );
532 DBGC2 ( ipv6col ( &iphdr->dest ), "%s len %zd next %d\n",
533 inet6_ntoa ( &iphdr->dest ), len, iphdr->next_header );
534
535 /* Calculate link-layer destination address, if possible */
536 if ( IN6_IS_ADDR_MULTICAST ( next_hop ) ) {
537 /* Multicast address */
538 ipv6_stats.out_mcast_pkts++;
539 if ( ( rc = netdev->ll_protocol->mc_hash ( AF_INET6, next_hop,
540 ll_dest_buf ) ) !=0){
541 DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not hash "
542 "multicast %s: %s\n", inet6_ntoa ( next_hop ),
543 strerror ( rc ) );
544 goto err;
545 }
546 ll_dest = ll_dest_buf;
547 } else {
548 /* Unicast address */
549 ll_dest = NULL;
550 }
551
552 /* Update statistics */
553 ipv6_stats.out_transmits++;
554 ipv6_stats.out_octets += iob_len ( iobuf );
555
556 /* Hand off to link layer (via NDP if applicable) */
557 if ( ll_dest ) {
558 if ( ( rc = net_tx ( iobuf, netdev, &ipv6_protocol, ll_dest,
559 netdev->ll_addr ) ) != 0 ) {
560 DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not "
561 "transmit packet via %s: %s\n",
562 netdev->name, strerror ( rc ) );
563 return rc;
564 }
565 } else {
566 if ( ( rc = ndp_tx ( iobuf, netdev, next_hop, &iphdr->src,
567 netdev->ll_addr ) ) != 0 ) {
568 DBGC ( ipv6col ( &iphdr->dest ), "IPv6 could not "
569 "transmit packet via %s: %s\n",
570 netdev->name, strerror ( rc ) );
571 return rc;
572 }
573 }
574
575 return 0;
576
577 err:
578 free_iob ( iobuf );
579 return rc;
580 }
581
582 /**
583 * Process incoming IPv6 packets
584 *
585 * @v iobuf I/O buffer
586 * @v netdev Network device
587 * @v ll_dest Link-layer destination address
588 * @v ll_source Link-layer destination source
589 * @v flags Packet flags
590 * @ret rc Return status code
591 *
592 * This function expects an IPv6 network datagram. It processes the
593 * headers and sends it to the transport layer.
594 */
595 static int ipv6_rx ( struct io_buffer *iobuf, struct net_device *netdev,
596 const void *ll_dest __unused,
597 const void *ll_source __unused,
598 unsigned int flags __unused ) {
599 struct ipv6_header *iphdr = iobuf->data;
600 union ipv6_extension_header *ext;
601 union {
602 struct sockaddr_in6 sin6;
603 struct sockaddr_tcpip st;
604 } src, dest;
605 uint16_t pshdr_csum;
606 size_t len;
607 size_t hdrlen;
608 size_t extlen;
609 int this_header;
610 int next_header;
611 int rc;
612
613 /* Update statistics */
614 ipv6_stats.in_receives++;
615 ipv6_stats.in_octets += iob_len ( iobuf );
616 if ( flags & LL_BROADCAST ) {
617 ipv6_stats.in_bcast_pkts++;
618 } else if ( flags & LL_MULTICAST ) {
619 ipv6_stats.in_mcast_pkts++;
620 }
621
622 /* Sanity check the IPv6 header */
623 if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
624 DBGC ( ipv6col ( &iphdr->src ), "IPv6 packet too short at %zd "
625 "bytes (min %zd bytes)\n", iob_len ( iobuf ),
626 sizeof ( *iphdr ) );
627 rc = -EINVAL_LEN;
628 goto err_header;
629 }
630 if ( ( iphdr->ver_tc_label & htonl ( IPV6_MASK_VER ) ) !=
631 htonl ( IPV6_VER ) ) {
632 DBGC ( ipv6col ( &iphdr->src ), "IPv6 version %#08x not "
633 "supported\n", ntohl ( iphdr->ver_tc_label ) );
634 rc = -ENOTSUP_VER;
635 goto err_header;
636 }
637
638 /* Truncate packet to specified length */
639 len = ntohs ( iphdr->len );
640 if ( len > iob_len ( iobuf ) ) {
641 DBGC ( ipv6col ( &iphdr->src ), "IPv6 length too long at %zd "
642 "bytes (packet is %zd bytes)\n", len, iob_len ( iobuf ));
643 ipv6_stats.in_truncated_pkts++;
644 rc = -EINVAL_LEN;
645 goto err_other;
646 }
647 iob_unput ( iobuf, ( iob_len ( iobuf ) - len - sizeof ( *iphdr ) ) );
648 hdrlen = sizeof ( *iphdr );
649
650 /* Print IPv6 header for debugging */
651 DBGC2 ( ipv6col ( &iphdr->src ), "IPv6 RX %s<-",
652 inet6_ntoa ( &iphdr->dest ) );
653 DBGC2 ( ipv6col ( &iphdr->src ), "%s len %zd next %d\n",
654 inet6_ntoa ( &iphdr->src ), len, iphdr->next_header );
655
656 /* Discard unicast packets not destined for us */
657 if ( ( ! ( flags & LL_MULTICAST ) ) &&
658 ( ! ipv6_has_addr ( netdev, &iphdr->dest ) ) ) {
659 DBGC ( ipv6col ( &iphdr->src ), "IPv6 discarding non-local "
660 "unicast packet for %s\n", inet6_ntoa ( &iphdr->dest ) );
661 ipv6_stats.in_addr_errors++;
662 rc = -EPIPE;
663 goto err_other;
664 }
665
666 /* Process any extension headers */
667 next_header = iphdr->next_header;
668 while ( 1 ) {
669
670 /* Extract extension header */
671 this_header = next_header;
672 ext = ( iobuf->data + hdrlen );
673 extlen = sizeof ( ext->pad );
674 if ( iob_len ( iobuf ) < ( hdrlen + extlen ) ) {
675 DBGC ( ipv6col ( &iphdr->src ), "IPv6 too short for "
676 "extension header type %d at %zd bytes (min "
677 "%zd bytes)\n", this_header,
678 ( iob_len ( iobuf ) - hdrlen ), extlen );
679 rc = -EINVAL_LEN;
680 goto err_header;
681 }
682
683 /* Determine size of extension header (if applicable) */
684 if ( ( this_header == IPV6_HOPBYHOP ) ||
685 ( this_header == IPV6_DESTINATION ) ||
686 ( this_header == IPV6_ROUTING ) ) {
687 /* Length field is present */
688 extlen += ext->common.len;
689 } else if ( this_header == IPV6_FRAGMENT ) {
690 /* Length field is reserved and ignored (RFC2460) */
691 } else {
692 /* Not an extension header; assume rest is payload */
693 break;
694 }
695 if ( iob_len ( iobuf ) < ( hdrlen + extlen ) ) {
696 DBGC ( ipv6col ( &iphdr->src ), "IPv6 too short for "
697 "extension header type %d at %zd bytes (min "
698 "%zd bytes)\n", this_header,
699 ( iob_len ( iobuf ) - hdrlen ), extlen );
700 rc = -EINVAL_LEN;
701 goto err_header;
702 }
703 hdrlen += extlen;
704 next_header = ext->common.next_header;
705 DBGC2 ( ipv6col ( &iphdr->src ), "IPv6 RX %s<-",
706 inet6_ntoa ( &iphdr->dest ) );
707 DBGC2 ( ipv6col ( &iphdr->src ), "%s ext type %d len %zd next "
708 "%d\n", inet6_ntoa ( &iphdr->src ), this_header,
709 extlen, next_header );
710
711 /* Process this extension header */
712 if ( ( this_header == IPV6_HOPBYHOP ) ||
713 ( this_header == IPV6_DESTINATION ) ) {
714
715 /* Check that all options can be ignored */
716 if ( ( rc = ipv6_check_options ( iphdr, &ext->options,
717 extlen ) ) != 0 )
718 goto err_header;
719
720 } else if ( this_header == IPV6_FRAGMENT ) {
721
722 /* Reassemble fragments */
723 iobuf = fragment_reassemble ( &ipv6_reassembler, iobuf,
724 &hdrlen );
725 if ( ! iobuf )
726 return 0;
727 iphdr = iobuf->data;
728 }
729 }
730
731 /* Construct socket address, calculate pseudo-header checksum,
732 * and hand off to transport layer
733 */
734 memset ( &src, 0, sizeof ( src ) );
735 src.sin6.sin6_family = AF_INET6;
736 memcpy ( &src.sin6.sin6_addr, &iphdr->src,
737 sizeof ( src.sin6.sin6_addr ) );
738 src.sin6.sin6_scope_id = netdev->index;
739 memset ( &dest, 0, sizeof ( dest ) );
740 dest.sin6.sin6_family = AF_INET6;
741 memcpy ( &dest.sin6.sin6_addr, &iphdr->dest,
742 sizeof ( dest.sin6.sin6_addr ) );
743 dest.sin6.sin6_scope_id = netdev->index;
744 iob_pull ( iobuf, hdrlen );
745 pshdr_csum = ipv6_pshdr_chksum ( iphdr, iob_len ( iobuf ),
746 next_header, TCPIP_EMPTY_CSUM );
747 if ( ( rc = tcpip_rx ( iobuf, netdev, next_header, &src.st, &dest.st,
748 pshdr_csum, &ipv6_stats ) ) != 0 ) {
749 DBGC ( ipv6col ( &src.sin6.sin6_addr ), "IPv6 received packet "
750 "rejected by stack: %s\n", strerror ( rc ) );
751 return rc;
752 }
753
754 return 0;
755
756 err_header:
757 ipv6_stats.in_hdr_errors++;
758 err_other:
759 free_iob ( iobuf );
760 return rc;
761 }
762
763 /**
764 * Parse IPv6 address
765 *
766 * @v string IPv6 address string
767 * @ret in IPv6 address to fill in
768 * @ret rc Return status code
769 */
770 int inet6_aton ( const char *string, struct in6_addr *in ) {
771 uint16_t *word = in->s6_addr16;
772 uint16_t *end = ( word + ( sizeof ( in->s6_addr16 ) /
773 sizeof ( in->s6_addr16[0] ) ) );
774 uint16_t *pad = NULL;
775 const char *nptr = string;
776 char *endptr;
777 unsigned long value;
778 size_t pad_len;
779 size_t move_len;
780
781 /* Parse string */
782 while ( 1 ) {
783
784 /* Parse current word */
785 value = strtoul ( nptr, &endptr, 16 );
786 if ( value > 0xffff ) {
787 DBG ( "IPv6 invalid word value %#lx in \"%s\"\n",
788 value, string );
789 return -EINVAL;
790 }
791 *(word++) = htons ( value );
792
793 /* Parse separator */
794 if ( ! *endptr )
795 break;
796 if ( *endptr != ':' ) {
797 DBG ( "IPv6 invalid separator '%c' in \"%s\"\n",
798 *endptr, string );
799 return -EINVAL;
800 }
801 if ( ( endptr == nptr ) && ( nptr != string ) ) {
802 if ( pad ) {
803 DBG ( "IPv6 invalid multiple \"::\" in "
804 "\"%s\"\n", string );
805 return -EINVAL;
806 }
807 pad = word;
808 }
809 nptr = ( endptr + 1 );
810
811 /* Check for overrun */
812 if ( word == end ) {
813 DBG ( "IPv6 too many words in \"%s\"\n", string );
814 return -EINVAL;
815 }
816 }
817
818 /* Insert padding if specified */
819 if ( pad ) {
820 move_len = ( ( ( void * ) word ) - ( ( void * ) pad ) );
821 pad_len = ( ( ( void * ) end ) - ( ( void * ) word ) );
822 memmove ( ( ( ( void * ) pad ) + pad_len ), pad, move_len );
823 memset ( pad, 0, pad_len );
824 } else if ( word != end ) {
825 DBG ( "IPv6 underlength address \"%s\"\n", string );
826 return -EINVAL;
827 }
828
829 return 0;
830 }
831
832 /**
833 * Convert IPv6 address to standard notation
834 *
835 * @v in IPv6 address
836 * @ret string IPv6 address string in canonical format
837 *
838 * RFC5952 defines the canonical format for IPv6 textual representation.
839 */
840 char * inet6_ntoa ( const struct in6_addr *in ) {
841 static char buf[41]; /* ":xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx" */
842 char *out = buf;
843 char *longest_start = NULL;
844 char *start = NULL;
845 int longest_len = 1;
846 int len = 0;
847 char *dest;
848 unsigned int i;
849 uint16_t value;
850
851 /* Format address, keeping track of longest run of zeros */
852 for ( i = 0 ; i < ( sizeof ( in->s6_addr16 ) /
853 sizeof ( in->s6_addr16[0] ) ) ; i++ ) {
854 value = ntohs ( in->s6_addr16[i] );
855 if ( value == 0 ) {
856 if ( len++ == 0 )
857 start = out;
858 if ( len > longest_len ) {
859 longest_start = start;
860 longest_len = len;
861 }
862 } else {
863 len = 0;
864 }
865 out += sprintf ( out, ":%x", value );
866 }
867
868 /* Abbreviate longest run of zeros, if applicable */
869 if ( longest_start ) {
870 dest = strcpy ( ( longest_start + 1 ),
871 ( longest_start + ( 2 * longest_len ) ) );
872 if ( dest[0] == '\0' )
873 dest[1] = '\0';
874 dest[0] = ':';
875 }
876 return ( ( longest_start == buf ) ? buf : ( buf + 1 ) );
877 }
878
879 /**
880 * Transcribe IPv6 address
881 *
882 * @v net_addr IPv6 address
883 * @ret string IPv6 address in standard notation
884 *
885 */
886 static const char * ipv6_ntoa ( const void *net_addr ) {
887 return inet6_ntoa ( net_addr );
888 }
889
890 /**
891 * Transcribe IPv6 socket address
892 *
893 * @v sa Socket address
894 * @ret string Socket address in standard notation
895 */
896 static const char * ipv6_sock_ntoa ( struct sockaddr *sa ) {
897 static char buf[ 39 /* "xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx" */ +
898 1 /* "%" */ + NETDEV_NAME_LEN + 1 /* NUL */ ];
899 struct sockaddr_in6 *sin6 = ( ( struct sockaddr_in6 * ) sa );
900 struct in6_addr *in = &sin6->sin6_addr;
901 struct net_device *netdev;
902 const char *netdev_name;
903
904 /* Identify network device, if applicable */
905 if ( IN6_IS_ADDR_NONGLOBAL ( in ) ) {
906 netdev = find_netdev_by_index ( sin6->sin6_scope_id );
907 netdev_name = ( netdev ? netdev->name : "UNKNOWN" );
908 } else {
909 netdev_name = NULL;
910 }
911
912 /* Format socket address */
913 snprintf ( buf, sizeof ( buf ), "%s%s%s", inet6_ntoa ( in ),
914 ( netdev_name ? "%" : "" ),
915 ( netdev_name ? netdev_name : "" ) );
916 return buf;
917 }
918
919 /**
920 * Parse IPv6 socket address
921 *
922 * @v string Socket address string
923 * @v sa Socket address to fill in
924 * @ret rc Return status code
925 */
926 static int ipv6_sock_aton ( const char *string, struct sockaddr *sa ) {
927 struct sockaddr_in6 *sin6 = ( ( struct sockaddr_in6 * ) sa );
928 struct in6_addr in;
929 struct net_device *netdev;
930 size_t len;
931 char *tmp;
932 char *in_string;
933 char *netdev_string;
934 int rc;
935
936 /* Create modifiable copy of string */
937 tmp = strdup ( string );
938 if ( ! tmp ) {
939 rc = -ENOMEM;
940 goto err_alloc;
941 }
942 in_string = tmp;
943
944 /* Strip surrounding "[...]", if present */
945 len = strlen ( in_string );
946 if ( ( in_string[0] == '[' ) && ( in_string[ len - 1 ] == ']' ) ) {
947 in_string[ len - 1 ] = '\0';
948 in_string++;
949 }
950
951 /* Split at network device name, if present */
952 netdev_string = strchr ( in_string, '%' );
953 if ( netdev_string )
954 *(netdev_string++) = '\0';
955
956 /* Parse IPv6 address portion */
957 if ( ( rc = inet6_aton ( in_string, &in ) ) != 0 )
958 goto err_inet6_aton;
959
960 /* Parse scope ID, if applicable */
961 if ( netdev_string ) {
962
963 /* Parse explicit network device name, if present */
964 netdev = find_netdev ( netdev_string );
965 if ( ! netdev ) {
966 rc = -ENODEV;
967 goto err_find_netdev;
968 }
969 sin6->sin6_scope_id = netdev->index;
970
971 } else if ( IN6_IS_ADDR_NONGLOBAL ( &in ) ) {
972
973 /* If no network device is explicitly specified for a
974 * link-local or multicast address, default to using
975 * "netX" (if existent).
976 */
977 netdev = last_opened_netdev();
978 if ( netdev )
979 sin6->sin6_scope_id = netdev->index;
980 }
981
982 /* Copy IPv6 address portion to socket address */
983 memcpy ( &sin6->sin6_addr, &in, sizeof ( sin6->sin6_addr ) );
984
985 err_find_netdev:
986 err_inet6_aton:
987 free ( tmp );
988 err_alloc:
989 return rc;
990 }
991
992 /** IPv6 protocol */
993 struct net_protocol ipv6_protocol __net_protocol = {
994 .name = "IPv6",
995 .net_proto = htons ( ETH_P_IPV6 ),
996 .net_addr_len = sizeof ( struct in6_addr ),
997 .rx = ipv6_rx,
998 .ntoa = ipv6_ntoa,
999 };
1000
1001 /** IPv6 TCPIP net protocol */
1002 struct tcpip_net_protocol ipv6_tcpip_protocol __tcpip_net_protocol = {
1003 .name = "IPv6",
1004 .sa_family = AF_INET6,
1005 .header_len = sizeof ( struct ipv6_header ),
1006 .net_protocol = &ipv6_protocol,
1007 .tx = ipv6_tx,
1008 .netdev = ipv6_netdev,
1009 };
1010
1011 /** IPv6 socket address converter */
1012 struct sockaddr_converter ipv6_sockaddr_converter __sockaddr_converter = {
1013 .family = AF_INET6,
1014 .ntoa = ipv6_sock_ntoa,
1015 .aton = ipv6_sock_aton,
1016 };
1017
1018 /**
1019 * Parse IPv6 address setting value
1020 *
1021 * @v type Setting type
1022 * @v value Formatted setting value
1023 * @v buf Buffer to contain raw value
1024 * @v len Length of buffer
1025 * @ret len Length of raw value, or negative error
1026 */
1027 int parse_ipv6_setting ( const struct setting_type *type __unused,
1028 const char *value, void *buf, size_t len ) {
1029 struct in6_addr ipv6;
1030 int rc;
1031
1032 /* Parse IPv6 address */
1033 if ( ( rc = inet6_aton ( value, &ipv6 ) ) != 0 )
1034 return rc;
1035
1036 /* Copy to buffer */
1037 if ( len > sizeof ( ipv6 ) )
1038 len = sizeof ( ipv6 );
1039 memcpy ( buf, &ipv6, len );
1040
1041 return ( sizeof ( ipv6 ) );
1042 }
1043
1044 /**
1045 * Format IPv6 address setting value
1046 *
1047 * @v type Setting type
1048 * @v raw Raw setting value
1049 * @v raw_len Length of raw setting value
1050 * @v buf Buffer to contain formatted value
1051 * @v len Length of buffer
1052 * @ret len Length of formatted value, or negative error
1053 */
1054 int format_ipv6_setting ( const struct setting_type *type __unused,
1055 const void *raw, size_t raw_len, char *buf,
1056 size_t len ) {
1057 const struct in6_addr *ipv6 = raw;
1058
1059 if ( raw_len < sizeof ( *ipv6 ) )
1060 return -EINVAL;
1061 return snprintf ( buf, len, "%s", inet6_ntoa ( ipv6 ) );
1062 }
1063
1064 /**
1065 * Create IPv6 network device
1066 *
1067 * @v netdev Network device
1068 * @ret rc Return status code
1069 */
1070 static int ipv6_probe ( struct net_device *netdev ) {
1071 struct ipv6_miniroute *miniroute;
1072 struct in6_addr address;
1073 int prefix_len;
1074 int rc;
1075
1076 /* Construct link-local address from EUI-64 as per RFC 2464 */
1077 memset ( &address, 0, sizeof ( address ) );
1078 prefix_len = ipv6_link_local ( &address, netdev );
1079 if ( prefix_len < 0 ) {
1080 rc = prefix_len;
1081 DBGC ( netdev, "IPv6 %s could not construct link-local "
1082 "address: %s\n", netdev->name, strerror ( rc ) );
1083 return rc;
1084 }
1085
1086 /* Create link-local address for this network device */
1087 miniroute = ipv6_add_miniroute ( netdev, &address, prefix_len,
1088 IPV6_HAS_ADDRESS );
1089 if ( ! miniroute )
1090 return -ENOMEM;
1091
1092 return 0;
1093 }
1094
1095 /**
1096 * Destroy IPv6 network device
1097 *
1098 * @v netdev Network device
1099 */
1100 static void ipv6_remove ( struct net_device *netdev ) {
1101 struct ipv6_miniroute *miniroute;
1102 struct ipv6_miniroute *tmp;
1103
1104 /* Delete all miniroutes for this network device */
1105 list_for_each_entry_safe ( miniroute, tmp, &ipv6_miniroutes, list ) {
1106 if ( miniroute->netdev == netdev ) {
1107 netdev_put ( miniroute->netdev );
1108 list_del ( &miniroute->list );
1109 free ( miniroute );
1110 }
1111 }
1112 }
1113
1114 /** IPv6 network device driver */
1115 struct net_driver ipv6_driver __net_driver = {
1116 .name = "IPv6",
1117 .probe = ipv6_probe,
1118 .remove = ipv6_remove,
1119 };
1120
1121 /* Drag in objects via ipv6_protocol */
1122 REQUIRING_SYMBOL ( ipv6_protocol );
1123
1124 /* Drag in ICMPv6 */
1125 REQUIRE_OBJECT ( icmpv6 );
1126
1127 /* Drag in NDP */
1128 REQUIRE_OBJECT ( ndp );