net_pkt: Extend packet abstraction as required by e1000e functionality
[qemu.git] / hw / net / net_rx_pkt.c
1 /*
2 * QEMU RX packets abstractions
3 *
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5 *
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
7 *
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
15 *
16 */
17
18 #include "qemu/osdep.h"
19 #include "trace.h"
20 #include "net_rx_pkt.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23
24 struct NetRxPkt {
25 struct virtio_net_hdr virt_hdr;
26 uint8_t ehdr_buf[sizeof(struct eth_header)];
27 struct iovec *vec;
28 uint16_t vec_len_total;
29 uint16_t vec_len;
30 uint32_t tot_len;
31 uint16_t tci;
32 bool vlan_stripped;
33 bool has_virt_hdr;
34 eth_pkt_types_e packet_type;
35
36 /* Analysis results */
37 bool isip4;
38 bool isip6;
39 bool isudp;
40 bool istcp;
41
42 size_t l3hdr_off;
43 size_t l4hdr_off;
44 size_t l5hdr_off;
45
46 eth_ip6_hdr_info ip6hdr_info;
47 eth_ip4_hdr_info ip4hdr_info;
48 eth_l4_hdr_info l4hdr_info;
49 };
50
51 void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr)
52 {
53 struct NetRxPkt *p = g_malloc0(sizeof *p);
54 p->has_virt_hdr = has_virt_hdr;
55 p->vec = NULL;
56 p->vec_len_total = 0;
57 *pkt = p;
58 }
59
60 void net_rx_pkt_uninit(struct NetRxPkt *pkt)
61 {
62 if (pkt->vec_len_total != 0) {
63 g_free(pkt->vec);
64 }
65
66 g_free(pkt);
67 }
68
69 struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt)
70 {
71 assert(pkt);
72 return &pkt->virt_hdr;
73 }
74
75 static inline void
76 net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt,
77 int new_iov_len)
78 {
79 if (pkt->vec_len_total < new_iov_len) {
80 g_free(pkt->vec);
81 pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len);
82 pkt->vec_len_total = new_iov_len;
83 }
84 }
85
86 static void
87 net_rx_pkt_pull_data(struct NetRxPkt *pkt,
88 const struct iovec *iov, int iovcnt,
89 size_t ploff)
90 {
91 if (pkt->vlan_stripped) {
92 net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
93
94 pkt->vec[0].iov_base = pkt->ehdr_buf;
95 pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf);
96
97 pkt->tot_len =
98 iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header);
99
100 pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
101 iov, iovcnt, ploff, pkt->tot_len);
102 } else {
103 net_rx_pkt_iovec_realloc(pkt, iovcnt);
104
105 pkt->tot_len = iov_size(iov, iovcnt) - ploff;
106 pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total,
107 iov, iovcnt, ploff, pkt->tot_len);
108 }
109
110 eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6,
111 &pkt->isudp, &pkt->istcp,
112 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
113 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
114
115 trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp,
116 pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off);
117 }
118
119 void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
120 const struct iovec *iov, int iovcnt,
121 size_t iovoff, bool strip_vlan)
122 {
123 uint16_t tci = 0;
124 uint16_t ploff = iovoff;
125 assert(pkt);
126 pkt->vlan_stripped = false;
127
128 if (strip_vlan) {
129 pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf,
130 &ploff, &tci);
131 }
132
133 pkt->tci = tci;
134
135 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
136 }
137
138 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
139 const struct iovec *iov, int iovcnt,
140 size_t iovoff, bool strip_vlan,
141 uint16_t vet)
142 {
143 uint16_t tci = 0;
144 uint16_t ploff = iovoff;
145 assert(pkt);
146 pkt->vlan_stripped = false;
147
148 if (strip_vlan) {
149 pkt->vlan_stripped = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
150 pkt->ehdr_buf,
151 &ploff, &tci);
152 }
153
154 pkt->tci = tci;
155
156 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
157 }
158
159 void net_rx_pkt_dump(struct NetRxPkt *pkt)
160 {
161 #ifdef NET_RX_PKT_DEBUG
162 NetRxPkt *pkt = (NetRxPkt *)pkt;
163 assert(pkt);
164
165 printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n",
166 pkt->tot_len, pkt->vlan_stripped, pkt->tci);
167 #endif
168 }
169
170 void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt,
171 eth_pkt_types_e packet_type)
172 {
173 assert(pkt);
174
175 pkt->packet_type = packet_type;
176
177 }
178
179 eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt)
180 {
181 assert(pkt);
182
183 return pkt->packet_type;
184 }
185
186 size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt)
187 {
188 assert(pkt);
189
190 return pkt->tot_len;
191 }
192
193 void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
194 size_t len)
195 {
196 const struct iovec iov = {
197 .iov_base = (void *)data,
198 .iov_len = len
199 };
200
201 assert(pkt);
202
203 eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6,
204 &pkt->isudp, &pkt->istcp,
205 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
206 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
207 }
208
209 void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
210 bool *isip4, bool *isip6,
211 bool *isudp, bool *istcp)
212 {
213 assert(pkt);
214
215 *isip4 = pkt->isip4;
216 *isip6 = pkt->isip6;
217 *isudp = pkt->isudp;
218 *istcp = pkt->istcp;
219 }
220
221 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt)
222 {
223 assert(pkt);
224 return pkt->l3hdr_off;
225 }
226
227 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt)
228 {
229 assert(pkt);
230 return pkt->l4hdr_off;
231 }
232
233 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt)
234 {
235 assert(pkt);
236 return pkt->l5hdr_off;
237 }
238
239 eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt)
240 {
241 return &pkt->ip6hdr_info;
242 }
243
244 eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt)
245 {
246 return &pkt->ip4hdr_info;
247 }
248
249 eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt)
250 {
251 return &pkt->l4hdr_info;
252 }
253
254 static inline void
255 _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written,
256 void *ptr, size_t size)
257 {
258 memcpy(&rss_input[*bytes_written], ptr, size);
259 trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written);
260 *bytes_written += size;
261 }
262
263 static inline void
264 _net_rx_rss_prepare_ip4(uint8_t *rss_input,
265 struct NetRxPkt *pkt,
266 size_t *bytes_written)
267 {
268 struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr;
269
270 _net_rx_rss_add_chunk(rss_input, bytes_written,
271 &ip4_hdr->ip_src, sizeof(uint32_t));
272
273 _net_rx_rss_add_chunk(rss_input, bytes_written,
274 &ip4_hdr->ip_dst, sizeof(uint32_t));
275 }
276
277 static inline void
278 _net_rx_rss_prepare_ip6(uint8_t *rss_input,
279 struct NetRxPkt *pkt,
280 bool ipv6ex, size_t *bytes_written)
281 {
282 eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info;
283
284 _net_rx_rss_add_chunk(rss_input, bytes_written,
285 (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src
286 : &ip6info->ip6_hdr.ip6_src,
287 sizeof(struct in6_address));
288
289 _net_rx_rss_add_chunk(rss_input, bytes_written,
290 (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst
291 : &ip6info->ip6_hdr.ip6_dst,
292 sizeof(struct in6_address));
293 }
294
295 static inline void
296 _net_rx_rss_prepare_tcp(uint8_t *rss_input,
297 struct NetRxPkt *pkt,
298 size_t *bytes_written)
299 {
300 struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp;
301
302 _net_rx_rss_add_chunk(rss_input, bytes_written,
303 &tcphdr->th_sport, sizeof(uint16_t));
304
305 _net_rx_rss_add_chunk(rss_input, bytes_written,
306 &tcphdr->th_dport, sizeof(uint16_t));
307 }
308
309 uint32_t
310 net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
311 NetRxPktRssType type,
312 uint8_t *key)
313 {
314 uint8_t rss_input[36];
315 size_t rss_length = 0;
316 uint32_t rss_hash = 0;
317 net_toeplitz_key key_data;
318
319 switch (type) {
320 case NetPktRssIpV4:
321 assert(pkt->isip4);
322 trace_net_rx_pkt_rss_ip4();
323 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
324 break;
325 case NetPktRssIpV4Tcp:
326 assert(pkt->isip4);
327 assert(pkt->istcp);
328 trace_net_rx_pkt_rss_ip4_tcp();
329 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
330 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
331 break;
332 case NetPktRssIpV6Tcp:
333 assert(pkt->isip6);
334 assert(pkt->istcp);
335 trace_net_rx_pkt_rss_ip6_tcp();
336 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
337 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
338 break;
339 case NetPktRssIpV6:
340 assert(pkt->isip6);
341 trace_net_rx_pkt_rss_ip6();
342 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
343 break;
344 case NetPktRssIpV6Ex:
345 assert(pkt->isip6);
346 trace_net_rx_pkt_rss_ip6_ex();
347 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
348 break;
349 default:
350 assert(false);
351 break;
352 }
353
354 net_toeplitz_key_init(&key_data, key);
355 net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data);
356
357 trace_net_rx_pkt_rss_hash(rss_length, rss_hash);
358
359 return rss_hash;
360 }
361
362 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt)
363 {
364 assert(pkt);
365
366 if (pkt->isip4) {
367 return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id);
368 }
369
370 return 0;
371 }
372
373 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt)
374 {
375 assert(pkt);
376
377 if (pkt->istcp) {
378 return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK;
379 }
380
381 return false;
382 }
383
384 bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt)
385 {
386 assert(pkt);
387
388 if (pkt->istcp) {
389 return pkt->l4hdr_info.has_tcp_data;
390 }
391
392 return false;
393 }
394
395 struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt)
396 {
397 assert(pkt);
398
399 return pkt->vec;
400 }
401
402 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt)
403 {
404 assert(pkt);
405
406 return pkt->vec_len;
407 }
408
409 void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
410 struct virtio_net_hdr *vhdr)
411 {
412 assert(pkt);
413
414 memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr);
415 }
416
417 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
418 const struct iovec *iov, int iovcnt)
419 {
420 assert(pkt);
421
422 iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
423 }
424
425 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
426 {
427 assert(pkt);
428
429 return pkt->vlan_stripped;
430 }
431
432 bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt)
433 {
434 assert(pkt);
435
436 return pkt->has_virt_hdr;
437 }
438
439 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt)
440 {
441 assert(pkt);
442
443 return pkt->tci;
444 }
445
446 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid)
447 {
448 uint32_t cntr;
449 uint16_t csum;
450 uint32_t csl;
451
452 trace_net_rx_pkt_l3_csum_validate_entry();
453
454 if (!pkt->isip4) {
455 trace_net_rx_pkt_l3_csum_validate_not_ip4();
456 return false;
457 }
458
459 csl = pkt->l4hdr_off - pkt->l3hdr_off;
460
461 cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len,
462 pkt->l3hdr_off,
463 csl, 0);
464
465 csum = net_checksum_finish(cntr);
466
467 *csum_valid = (csum == 0);
468
469 trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl,
470 cntr, csum, *csum_valid);
471
472 return true;
473 }
474
475 static uint16_t
476 _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt)
477 {
478 uint32_t cntr;
479 uint16_t csum;
480 uint16_t csl;
481 uint32_t cso;
482
483 trace_net_rx_pkt_l4_csum_calc_entry();
484
485 if (pkt->isip4) {
486 if (pkt->isudp) {
487 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
488 trace_net_rx_pkt_l4_csum_calc_ip4_udp();
489 } else {
490 csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) -
491 IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr);
492 trace_net_rx_pkt_l4_csum_calc_ip4_tcp();
493 }
494
495 cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr,
496 csl, &cso);
497 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
498 } else {
499 if (pkt->isudp) {
500 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
501 trace_net_rx_pkt_l4_csum_calc_ip6_udp();
502 } else {
503 struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr;
504 size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off;
505 size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
506
507 csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) -
508 ip6opts_len;
509 trace_net_rx_pkt_l4_csum_calc_ip6_tcp();
510 }
511
512 cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl,
513 pkt->ip6hdr_info.l4proto, &cso);
514 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
515 }
516
517 cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len,
518 pkt->l4hdr_off, csl, cso);
519
520 csum = net_checksum_finish(cntr);
521
522 trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum);
523
524 return csum;
525 }
526
527 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)
528 {
529 uint16_t csum;
530
531 trace_net_rx_pkt_l4_csum_validate_entry();
532
533 if (!pkt->istcp && !pkt->isudp) {
534 trace_net_rx_pkt_l4_csum_validate_not_xxp();
535 return false;
536 }
537
538 if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) {
539 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
540 return false;
541 }
542
543 if (pkt->isip4 && pkt->ip4hdr_info.fragment) {
544 trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
545 return false;
546 }
547
548 csum = _net_rx_pkt_calc_l4_csum(pkt);
549
550 *csum_valid = ((csum == 0) || (csum == 0xFFFF));
551
552 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid);
553
554 return true;
555 }
556
557 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt)
558 {
559 uint16_t csum = 0;
560 uint32_t l4_cso;
561
562 trace_net_rx_pkt_l4_csum_fix_entry();
563
564 if (pkt->istcp) {
565 l4_cso = offsetof(struct tcp_header, th_sum);
566 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso);
567 } else if (pkt->isudp) {
568 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
569 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
570 return false;
571 }
572 l4_cso = offsetof(struct udp_header, uh_sum);
573 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso);
574 } else {
575 trace_net_rx_pkt_l4_csum_fix_not_xxp();
576 return false;
577 }
578
579 if (pkt->isip4 && pkt->ip4hdr_info.fragment) {
580 trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
581 return false;
582 }
583
584 /* Set zero to checksum word */
585 iov_from_buf(pkt->vec, pkt->vec_len,
586 pkt->l4hdr_off + l4_cso,
587 &csum, sizeof(csum));
588
589 /* Calculate L4 checksum */
590 csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt));
591
592 /* Set calculated checksum to checksum word */
593 iov_from_buf(pkt->vec, pkt->vec_len,
594 pkt->l4hdr_off + l4_cso,
595 &csum, sizeof(csum));
596
597 trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum);
598
599 return true;
600 }