Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging
[qemu.git] / net / eth.c
1 /*
2 * QEMU network structures definitions and helper functions
3 *
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5 *
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
7 *
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
15 *
16 */
17
18 #include "qemu/osdep.h"
19 #include "net/eth.h"
20 #include "net/checksum.h"
21 #include "qemu-common.h"
22 #include "net/tap.h"
23
24 void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
25 uint16_t vlan_ethtype, bool *is_new)
26 {
27 struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
28
29 switch (be16_to_cpu(ehdr->h_proto)) {
30 case ETH_P_VLAN:
31 case ETH_P_DVLAN:
32 /* vlan hdr exists */
33 *is_new = false;
34 break;
35
36 default:
37 /* No VLAN header, put a new one */
38 vhdr->h_proto = ehdr->h_proto;
39 ehdr->h_proto = cpu_to_be16(vlan_ethtype);
40 *is_new = true;
41 break;
42 }
43 vhdr->h_tci = cpu_to_be16(vlan_tag);
44 }
45
46 uint8_t
47 eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
48 {
49 uint8_t ecn_state = 0;
50
51 if (l3_proto == ETH_P_IP) {
52 struct ip_header *iphdr = (struct ip_header *) l3_hdr;
53
54 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
55 if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
56 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
57 }
58 if (l4proto == IP_PROTO_TCP) {
59 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
60 } else if (l4proto == IP_PROTO_UDP) {
61 return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
62 }
63 }
64 } else if (l3_proto == ETH_P_IPV6) {
65 struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
66
67 if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
68 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
69 }
70
71 if (l4proto == IP_PROTO_TCP) {
72 return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
73 }
74 }
75
76 /* Unsupported offload */
77 g_assert_not_reached();
78
79 return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
80 }
81
82 uint16_t
83 eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
84 {
85 uint16_t proto;
86 size_t copied;
87 size_t size = iov_size(l2hdr_iov, iovcnt);
88 size_t proto_offset = l2hdr_len - sizeof(proto);
89
90 if (size < proto_offset) {
91 return ETH_P_UNKNOWN;
92 }
93
94 copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
95 &proto, sizeof(proto));
96
97 return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
98 }
99
100 static bool
101 _eth_copy_chunk(size_t input_size,
102 const struct iovec *iov, int iovcnt,
103 size_t offset, size_t length,
104 void *buffer)
105 {
106 size_t copied;
107
108 if (input_size < offset) {
109 return false;
110 }
111
112 copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
113
114 if (copied < length) {
115 return false;
116 }
117
118 return true;
119 }
120
121 static bool
122 _eth_tcp_has_data(bool is_ip4,
123 const struct ip_header *ip4_hdr,
124 const struct ip6_header *ip6_hdr,
125 size_t full_ip6hdr_len,
126 const struct tcp_header *tcp)
127 {
128 uint32_t l4len;
129
130 if (is_ip4) {
131 l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
132 } else {
133 size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
134 l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
135 }
136
137 return l4len > TCP_HEADER_DATA_OFFSET(tcp);
138 }
139
140 void eth_get_protocols(const struct iovec *iov, int iovcnt,
141 bool *isip4, bool *isip6,
142 bool *isudp, bool *istcp,
143 size_t *l3hdr_off,
144 size_t *l4hdr_off,
145 size_t *l5hdr_off,
146 eth_ip6_hdr_info *ip6hdr_info,
147 eth_ip4_hdr_info *ip4hdr_info,
148 eth_l4_hdr_info *l4hdr_info)
149 {
150 int proto;
151 bool fragment = false;
152 size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
153 size_t input_size = iov_size(iov, iovcnt);
154 size_t copied;
155
156 *isip4 = *isip6 = *isudp = *istcp = false;
157
158 proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
159
160 *l3hdr_off = l2hdr_len;
161
162 if (proto == ETH_P_IP) {
163 struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
164
165 if (input_size < l2hdr_len) {
166 return;
167 }
168
169 copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
170
171 *isip4 = true;
172
173 if (copied < sizeof(*iphdr)) {
174 return;
175 }
176
177 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
178 if (iphdr->ip_p == IP_PROTO_TCP) {
179 *istcp = true;
180 } else if (iphdr->ip_p == IP_PROTO_UDP) {
181 *isudp = true;
182 }
183 }
184
185 ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
186 *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
187
188 fragment = ip4hdr_info->fragment;
189 } else if (proto == ETH_P_IPV6) {
190
191 *isip6 = true;
192 if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
193 ip6hdr_info)) {
194 if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
195 *istcp = true;
196 } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
197 *isudp = true;
198 }
199 } else {
200 return;
201 }
202
203 *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
204 fragment = ip6hdr_info->fragment;
205 }
206
207 if (!fragment) {
208 if (*istcp) {
209 *istcp = _eth_copy_chunk(input_size,
210 iov, iovcnt,
211 *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
212 &l4hdr_info->hdr.tcp);
213
214 if (*istcp) {
215 *l5hdr_off = *l4hdr_off +
216 TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
217
218 l4hdr_info->has_tcp_data =
219 _eth_tcp_has_data(proto == ETH_P_IP,
220 &ip4hdr_info->ip4_hdr,
221 &ip6hdr_info->ip6_hdr,
222 *l4hdr_off - *l3hdr_off,
223 &l4hdr_info->hdr.tcp);
224 }
225 } else if (*isudp) {
226 *isudp = _eth_copy_chunk(input_size,
227 iov, iovcnt,
228 *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
229 &l4hdr_info->hdr.udp);
230 *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
231 }
232 }
233 }
234
235 size_t
236 eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
237 uint8_t *new_ehdr_buf,
238 uint16_t *payload_offset, uint16_t *tci)
239 {
240 struct vlan_header vlan_hdr;
241 struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
242
243 size_t copied = iov_to_buf(iov, iovcnt, iovoff,
244 new_ehdr, sizeof(*new_ehdr));
245
246 if (copied < sizeof(*new_ehdr)) {
247 return 0;
248 }
249
250 switch (be16_to_cpu(new_ehdr->h_proto)) {
251 case ETH_P_VLAN:
252 case ETH_P_DVLAN:
253 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
254 &vlan_hdr, sizeof(vlan_hdr));
255
256 if (copied < sizeof(vlan_hdr)) {
257 return 0;
258 }
259
260 new_ehdr->h_proto = vlan_hdr.h_proto;
261
262 *tci = be16_to_cpu(vlan_hdr.h_tci);
263 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
264
265 if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
266
267 copied = iov_to_buf(iov, iovcnt, *payload_offset,
268 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
269
270 if (copied < sizeof(vlan_hdr)) {
271 return 0;
272 }
273
274 *payload_offset += sizeof(vlan_hdr);
275
276 return sizeof(struct eth_header) + sizeof(struct vlan_header);
277 } else {
278 return sizeof(struct eth_header);
279 }
280 default:
281 return 0;
282 }
283 }
284
285 size_t
286 eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
287 uint16_t vet, uint8_t *new_ehdr_buf,
288 uint16_t *payload_offset, uint16_t *tci)
289 {
290 struct vlan_header vlan_hdr;
291 struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
292
293 size_t copied = iov_to_buf(iov, iovcnt, iovoff,
294 new_ehdr, sizeof(*new_ehdr));
295
296 if (copied < sizeof(*new_ehdr)) {
297 return 0;
298 }
299
300 if (be16_to_cpu(new_ehdr->h_proto) == vet) {
301 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
302 &vlan_hdr, sizeof(vlan_hdr));
303
304 if (copied < sizeof(vlan_hdr)) {
305 return 0;
306 }
307
308 new_ehdr->h_proto = vlan_hdr.h_proto;
309
310 *tci = be16_to_cpu(vlan_hdr.h_tci);
311 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
312 return sizeof(struct eth_header);
313 }
314
315 return 0;
316 }
317
318 void
319 eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
320 void *l3hdr, size_t l3hdr_len,
321 size_t l3payload_len,
322 size_t frag_offset, bool more_frags)
323 {
324 const struct iovec l2vec = {
325 .iov_base = (void *) l2hdr,
326 .iov_len = l2hdr_len
327 };
328
329 if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
330 uint16_t orig_flags;
331 struct ip_header *iphdr = (struct ip_header *) l3hdr;
332 uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
333 uint16_t new_ip_off;
334
335 assert(frag_offset % IP_FRAG_UNIT_SIZE == 0);
336 assert((frag_off_units & ~IP_OFFMASK) == 0);
337
338 orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
339 new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0);
340 iphdr->ip_off = cpu_to_be16(new_ip_off);
341 iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
342 }
343 }
344
345 void
346 eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
347 {
348 struct ip_header *iphdr = (struct ip_header *) l3hdr;
349 iphdr->ip_sum = 0;
350 iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
351 }
352
353 uint32_t
354 eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
355 uint16_t csl,
356 uint32_t *cso)
357 {
358 struct ip_pseudo_header ipph;
359 ipph.ip_src = iphdr->ip_src;
360 ipph.ip_dst = iphdr->ip_dst;
361 ipph.ip_payload = cpu_to_be16(csl);
362 ipph.ip_proto = iphdr->ip_p;
363 ipph.zeros = 0;
364 *cso = sizeof(ipph);
365 return net_checksum_add(*cso, (uint8_t *) &ipph);
366 }
367
368 uint32_t
369 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
370 uint16_t csl,
371 uint8_t l4_proto,
372 uint32_t *cso)
373 {
374 struct ip6_pseudo_header ipph;
375 ipph.ip6_src = iphdr->ip6_src;
376 ipph.ip6_dst = iphdr->ip6_dst;
377 ipph.len = cpu_to_be16(csl);
378 ipph.zero[0] = 0;
379 ipph.zero[1] = 0;
380 ipph.zero[2] = 0;
381 ipph.next_hdr = l4_proto;
382 *cso = sizeof(ipph);
383 return net_checksum_add(*cso, (uint8_t *)&ipph);
384 }
385
386 static bool
387 eth_is_ip6_extension_header_type(uint8_t hdr_type)
388 {
389 switch (hdr_type) {
390 case IP6_HOP_BY_HOP:
391 case IP6_ROUTING:
392 case IP6_FRAGMENT:
393 case IP6_ESP:
394 case IP6_AUTHENTICATION:
395 case IP6_DESTINATON:
396 case IP6_MOBILITY:
397 return true;
398 default:
399 return false;
400 }
401 }
402
403 static bool
404 _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
405 size_t rthdr_offset,
406 struct ip6_ext_hdr *ext_hdr,
407 struct in6_address *dst_addr)
408 {
409 struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
410
411 if ((rthdr->rtype == 2) &&
412 (rthdr->len == sizeof(struct in6_address) / 8) &&
413 (rthdr->segleft == 1)) {
414
415 size_t input_size = iov_size(pkt, pkt_frags);
416 size_t bytes_read;
417
418 if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
419 return false;
420 }
421
422 bytes_read = iov_to_buf(pkt, pkt_frags,
423 rthdr_offset + sizeof(*ext_hdr),
424 dst_addr, sizeof(*dst_addr));
425
426 return bytes_read == sizeof(dst_addr);
427 }
428
429 return false;
430 }
431
432 static bool
433 _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
434 size_t dsthdr_offset,
435 struct ip6_ext_hdr *ext_hdr,
436 struct in6_address *src_addr)
437 {
438 size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
439 struct ip6_option_hdr opthdr;
440 size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
441
442 while (bytes_left > sizeof(opthdr)) {
443 size_t input_size = iov_size(pkt, pkt_frags);
444 size_t bytes_read, optlen;
445
446 if (input_size < opt_offset) {
447 return false;
448 }
449
450 bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
451 &opthdr, sizeof(opthdr));
452
453 if (bytes_read != sizeof(opthdr)) {
454 return false;
455 }
456
457 optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
458 : (opthdr.len + sizeof(opthdr));
459
460 if (optlen > bytes_left) {
461 return false;
462 }
463
464 if (opthdr.type == IP6_OPT_HOME) {
465 size_t input_size = iov_size(pkt, pkt_frags);
466
467 if (input_size < opt_offset + sizeof(opthdr)) {
468 return false;
469 }
470
471 bytes_read = iov_to_buf(pkt, pkt_frags,
472 opt_offset + sizeof(opthdr),
473 src_addr, sizeof(*src_addr));
474
475 return bytes_read == sizeof(src_addr);
476 }
477
478 opt_offset += optlen;
479 bytes_left -= optlen;
480 }
481
482 return false;
483 }
484
485 bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
486 size_t ip6hdr_off, eth_ip6_hdr_info *info)
487 {
488 struct ip6_ext_hdr ext_hdr;
489 size_t bytes_read;
490 uint8_t curr_ext_hdr_type;
491 size_t input_size = iov_size(pkt, pkt_frags);
492
493 info->rss_ex_dst_valid = false;
494 info->rss_ex_src_valid = false;
495 info->fragment = false;
496
497 if (input_size < ip6hdr_off) {
498 return false;
499 }
500
501 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
502 &info->ip6_hdr, sizeof(info->ip6_hdr));
503 if (bytes_read < sizeof(info->ip6_hdr)) {
504 return false;
505 }
506
507 info->full_hdr_len = sizeof(struct ip6_header);
508
509 curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
510
511 if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
512 info->l4proto = info->ip6_hdr.ip6_nxt;
513 info->has_ext_hdrs = false;
514 return true;
515 }
516
517 info->has_ext_hdrs = true;
518
519 do {
520 if (input_size < ip6hdr_off + info->full_hdr_len) {
521 return false;
522 }
523
524 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
525 &ext_hdr, sizeof(ext_hdr));
526
527 if (bytes_read < sizeof(ext_hdr)) {
528 return false;
529 }
530
531 if (curr_ext_hdr_type == IP6_ROUTING) {
532 info->rss_ex_dst_valid =
533 _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
534 ip6hdr_off + info->full_hdr_len,
535 &ext_hdr, &info->rss_ex_dst);
536 } else if (curr_ext_hdr_type == IP6_DESTINATON) {
537 info->rss_ex_src_valid =
538 _eth_get_rss_ex_src_addr(pkt, pkt_frags,
539 ip6hdr_off + info->full_hdr_len,
540 &ext_hdr, &info->rss_ex_src);
541 } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
542 info->fragment = true;
543 }
544
545 info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
546 curr_ext_hdr_type = ext_hdr.ip6r_nxt;
547 } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
548
549 info->l4proto = ext_hdr.ip6r_nxt;
550 return true;
551 }