net/virtio: Fix failover_replug_primary() return value regression
[qemu.git] / hw / net / virtio-net.c
1 /*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46
47 #define VIRTIO_NET_VM_VERSION 11
48
49 #define MAC_TABLE_ENTRIES 64
50 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
51
52 /* previously fixed value */
53 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
54 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
55
56 /* for now, only allow larger queues; with virtio-1, guest can downsize */
57 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
58 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
59
60 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
61
62 #define VIRTIO_NET_TCP_FLAG 0x3F
63 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
64
65 /* IPv4 max payload, 16 bits in the header */
66 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
67 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
68
69 /* header length value in ip header without option */
70 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
71
72 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
73 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
74
75 /* Purge coalesced packets timer interval, This value affects the performance
76 a lot, and should be tuned carefully, '300000'(300us) is the recommended
77 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
78 tso/gso/gro 'off'. */
79 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
80
81 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
82 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
85 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
88 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
90
91 static VirtIOFeature feature_sizes[] = {
92 {.flags = 1ULL << VIRTIO_NET_F_MAC,
93 .end = endof(struct virtio_net_config, mac)},
94 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
95 .end = endof(struct virtio_net_config, status)},
96 {.flags = 1ULL << VIRTIO_NET_F_MQ,
97 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
98 {.flags = 1ULL << VIRTIO_NET_F_MTU,
99 .end = endof(struct virtio_net_config, mtu)},
100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
101 .end = endof(struct virtio_net_config, duplex)},
102 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
103 .end = endof(struct virtio_net_config, supported_hash_types)},
104 {}
105 };
106
107 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
108 {
109 VirtIONet *n = qemu_get_nic_opaque(nc);
110
111 return &n->vqs[nc->queue_index];
112 }
113
114 static int vq2q(int queue_index)
115 {
116 return queue_index / 2;
117 }
118
119 /* TODO
120 * - we could suppress RX interrupt if we were so inclined.
121 */
122
123 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
124 {
125 VirtIONet *n = VIRTIO_NET(vdev);
126 struct virtio_net_config netcfg;
127
128 virtio_stw_p(vdev, &netcfg.status, n->status);
129 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
130 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
131 memcpy(netcfg.mac, n->mac, ETH_ALEN);
132 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
133 netcfg.duplex = n->net_conf.duplex;
134 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
135 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
136 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
137 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
138 virtio_stl_p(vdev, &netcfg.supported_hash_types,
139 VIRTIO_NET_RSS_SUPPORTED_HASHES);
140 memcpy(config, &netcfg, n->config_size);
141 }
142
143 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
144 {
145 VirtIONet *n = VIRTIO_NET(vdev);
146 struct virtio_net_config netcfg = {};
147
148 memcpy(&netcfg, config, n->config_size);
149
150 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
151 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
152 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
153 memcpy(n->mac, netcfg.mac, ETH_ALEN);
154 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
155 }
156 }
157
158 static bool virtio_net_started(VirtIONet *n, uint8_t status)
159 {
160 VirtIODevice *vdev = VIRTIO_DEVICE(n);
161 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
162 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
163 }
164
165 static void virtio_net_announce_notify(VirtIONet *net)
166 {
167 VirtIODevice *vdev = VIRTIO_DEVICE(net);
168 trace_virtio_net_announce_notify();
169
170 net->status |= VIRTIO_NET_S_ANNOUNCE;
171 virtio_notify_config(vdev);
172 }
173
174 static void virtio_net_announce_timer(void *opaque)
175 {
176 VirtIONet *n = opaque;
177 trace_virtio_net_announce_timer(n->announce_timer.round);
178
179 n->announce_timer.round--;
180 virtio_net_announce_notify(n);
181 }
182
183 static void virtio_net_announce(NetClientState *nc)
184 {
185 VirtIONet *n = qemu_get_nic_opaque(nc);
186 VirtIODevice *vdev = VIRTIO_DEVICE(n);
187
188 /*
189 * Make sure the virtio migration announcement timer isn't running
190 * If it is, let it trigger announcement so that we do not cause
191 * confusion.
192 */
193 if (n->announce_timer.round) {
194 return;
195 }
196
197 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
198 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
199 virtio_net_announce_notify(n);
200 }
201 }
202
203 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
204 {
205 VirtIODevice *vdev = VIRTIO_DEVICE(n);
206 NetClientState *nc = qemu_get_queue(n->nic);
207 int queues = n->multiqueue ? n->max_queues : 1;
208
209 if (!get_vhost_net(nc->peer)) {
210 return;
211 }
212
213 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
214 !!n->vhost_started) {
215 return;
216 }
217 if (!n->vhost_started) {
218 int r, i;
219
220 if (n->needs_vnet_hdr_swap) {
221 error_report("backend does not support %s vnet headers; "
222 "falling back on userspace virtio",
223 virtio_is_big_endian(vdev) ? "BE" : "LE");
224 return;
225 }
226
227 /* Any packets outstanding? Purge them to avoid touching rings
228 * when vhost is running.
229 */
230 for (i = 0; i < queues; i++) {
231 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
232
233 /* Purge both directions: TX and RX. */
234 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
235 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
236 }
237
238 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
239 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
240 if (r < 0) {
241 error_report("%uBytes MTU not supported by the backend",
242 n->net_conf.mtu);
243
244 return;
245 }
246 }
247
248 n->vhost_started = 1;
249 r = vhost_net_start(vdev, n->nic->ncs, queues);
250 if (r < 0) {
251 error_report("unable to start vhost net: %d: "
252 "falling back on userspace virtio", -r);
253 n->vhost_started = 0;
254 }
255 } else {
256 vhost_net_stop(vdev, n->nic->ncs, queues);
257 n->vhost_started = 0;
258 }
259 }
260
261 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
262 NetClientState *peer,
263 bool enable)
264 {
265 if (virtio_is_big_endian(vdev)) {
266 return qemu_set_vnet_be(peer, enable);
267 } else {
268 return qemu_set_vnet_le(peer, enable);
269 }
270 }
271
272 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
273 int queues, bool enable)
274 {
275 int i;
276
277 for (i = 0; i < queues; i++) {
278 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
279 enable) {
280 while (--i >= 0) {
281 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
282 }
283
284 return true;
285 }
286 }
287
288 return false;
289 }
290
291 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
292 {
293 VirtIODevice *vdev = VIRTIO_DEVICE(n);
294 int queues = n->multiqueue ? n->max_queues : 1;
295
296 if (virtio_net_started(n, status)) {
297 /* Before using the device, we tell the network backend about the
298 * endianness to use when parsing vnet headers. If the backend
299 * can't do it, we fallback onto fixing the headers in the core
300 * virtio-net code.
301 */
302 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
303 queues, true);
304 } else if (virtio_net_started(n, vdev->status)) {
305 /* After using the device, we need to reset the network backend to
306 * the default (guest native endianness), otherwise the guest may
307 * lose network connectivity if it is rebooted into a different
308 * endianness.
309 */
310 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
311 }
312 }
313
314 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
315 {
316 unsigned int dropped = virtqueue_drop_all(vq);
317 if (dropped) {
318 virtio_notify(vdev, vq);
319 }
320 }
321
322 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
323 {
324 VirtIONet *n = VIRTIO_NET(vdev);
325 VirtIONetQueue *q;
326 int i;
327 uint8_t queue_status;
328
329 virtio_net_vnet_endian_status(n, status);
330 virtio_net_vhost_status(n, status);
331
332 for (i = 0; i < n->max_queues; i++) {
333 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
334 bool queue_started;
335 q = &n->vqs[i];
336
337 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
338 queue_status = 0;
339 } else {
340 queue_status = status;
341 }
342 queue_started =
343 virtio_net_started(n, queue_status) && !n->vhost_started;
344
345 if (queue_started) {
346 qemu_flush_queued_packets(ncs);
347 }
348
349 if (!q->tx_waiting) {
350 continue;
351 }
352
353 if (queue_started) {
354 if (q->tx_timer) {
355 timer_mod(q->tx_timer,
356 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
357 } else {
358 qemu_bh_schedule(q->tx_bh);
359 }
360 } else {
361 if (q->tx_timer) {
362 timer_del(q->tx_timer);
363 } else {
364 qemu_bh_cancel(q->tx_bh);
365 }
366 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
367 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
368 vdev->vm_running) {
369 /* if tx is waiting we are likely have some packets in tx queue
370 * and disabled notification */
371 q->tx_waiting = 0;
372 virtio_queue_set_notification(q->tx_vq, 1);
373 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
374 }
375 }
376 }
377 }
378
379 static void virtio_net_set_link_status(NetClientState *nc)
380 {
381 VirtIONet *n = qemu_get_nic_opaque(nc);
382 VirtIODevice *vdev = VIRTIO_DEVICE(n);
383 uint16_t old_status = n->status;
384
385 if (nc->link_down)
386 n->status &= ~VIRTIO_NET_S_LINK_UP;
387 else
388 n->status |= VIRTIO_NET_S_LINK_UP;
389
390 if (n->status != old_status)
391 virtio_notify_config(vdev);
392
393 virtio_net_set_status(vdev, vdev->status);
394 }
395
396 static void rxfilter_notify(NetClientState *nc)
397 {
398 VirtIONet *n = qemu_get_nic_opaque(nc);
399
400 if (nc->rxfilter_notify_enabled) {
401 char *path = object_get_canonical_path(OBJECT(n->qdev));
402 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
403 n->netclient_name, path);
404 g_free(path);
405
406 /* disable event notification to avoid events flooding */
407 nc->rxfilter_notify_enabled = 0;
408 }
409 }
410
411 static intList *get_vlan_table(VirtIONet *n)
412 {
413 intList *list, *entry;
414 int i, j;
415
416 list = NULL;
417 for (i = 0; i < MAX_VLAN >> 5; i++) {
418 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
419 if (n->vlans[i] & (1U << j)) {
420 entry = g_malloc0(sizeof(*entry));
421 entry->value = (i << 5) + j;
422 entry->next = list;
423 list = entry;
424 }
425 }
426 }
427
428 return list;
429 }
430
431 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
432 {
433 VirtIONet *n = qemu_get_nic_opaque(nc);
434 VirtIODevice *vdev = VIRTIO_DEVICE(n);
435 RxFilterInfo *info;
436 strList *str_list, *entry;
437 int i;
438
439 info = g_malloc0(sizeof(*info));
440 info->name = g_strdup(nc->name);
441 info->promiscuous = n->promisc;
442
443 if (n->nouni) {
444 info->unicast = RX_STATE_NONE;
445 } else if (n->alluni) {
446 info->unicast = RX_STATE_ALL;
447 } else {
448 info->unicast = RX_STATE_NORMAL;
449 }
450
451 if (n->nomulti) {
452 info->multicast = RX_STATE_NONE;
453 } else if (n->allmulti) {
454 info->multicast = RX_STATE_ALL;
455 } else {
456 info->multicast = RX_STATE_NORMAL;
457 }
458
459 info->broadcast_allowed = n->nobcast;
460 info->multicast_overflow = n->mac_table.multi_overflow;
461 info->unicast_overflow = n->mac_table.uni_overflow;
462
463 info->main_mac = qemu_mac_strdup_printf(n->mac);
464
465 str_list = NULL;
466 for (i = 0; i < n->mac_table.first_multi; i++) {
467 entry = g_malloc0(sizeof(*entry));
468 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
469 entry->next = str_list;
470 str_list = entry;
471 }
472 info->unicast_table = str_list;
473
474 str_list = NULL;
475 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
476 entry = g_malloc0(sizeof(*entry));
477 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
478 entry->next = str_list;
479 str_list = entry;
480 }
481 info->multicast_table = str_list;
482 info->vlan_table = get_vlan_table(n);
483
484 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
485 info->vlan = RX_STATE_ALL;
486 } else if (!info->vlan_table) {
487 info->vlan = RX_STATE_NONE;
488 } else {
489 info->vlan = RX_STATE_NORMAL;
490 }
491
492 /* enable event notification after query */
493 nc->rxfilter_notify_enabled = 1;
494
495 return info;
496 }
497
498 static void virtio_net_reset(VirtIODevice *vdev)
499 {
500 VirtIONet *n = VIRTIO_NET(vdev);
501 int i;
502
503 /* Reset back to compatibility mode */
504 n->promisc = 1;
505 n->allmulti = 0;
506 n->alluni = 0;
507 n->nomulti = 0;
508 n->nouni = 0;
509 n->nobcast = 0;
510 /* multiqueue is disabled by default */
511 n->curr_queues = 1;
512 timer_del(n->announce_timer.tm);
513 n->announce_timer.round = 0;
514 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
515
516 /* Flush any MAC and VLAN filter table state */
517 n->mac_table.in_use = 0;
518 n->mac_table.first_multi = 0;
519 n->mac_table.multi_overflow = 0;
520 n->mac_table.uni_overflow = 0;
521 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
522 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
523 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
524 memset(n->vlans, 0, MAX_VLAN >> 3);
525
526 /* Flush any async TX */
527 for (i = 0; i < n->max_queues; i++) {
528 NetClientState *nc = qemu_get_subqueue(n->nic, i);
529
530 if (nc->peer) {
531 qemu_flush_or_purge_queued_packets(nc->peer, true);
532 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
533 }
534 }
535 }
536
537 static void peer_test_vnet_hdr(VirtIONet *n)
538 {
539 NetClientState *nc = qemu_get_queue(n->nic);
540 if (!nc->peer) {
541 return;
542 }
543
544 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
545 }
546
547 static int peer_has_vnet_hdr(VirtIONet *n)
548 {
549 return n->has_vnet_hdr;
550 }
551
552 static int peer_has_ufo(VirtIONet *n)
553 {
554 if (!peer_has_vnet_hdr(n))
555 return 0;
556
557 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
558
559 return n->has_ufo;
560 }
561
562 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
563 int version_1, int hash_report)
564 {
565 int i;
566 NetClientState *nc;
567
568 n->mergeable_rx_bufs = mergeable_rx_bufs;
569
570 if (version_1) {
571 n->guest_hdr_len = hash_report ?
572 sizeof(struct virtio_net_hdr_v1_hash) :
573 sizeof(struct virtio_net_hdr_mrg_rxbuf);
574 n->rss_data.populate_hash = !!hash_report;
575 } else {
576 n->guest_hdr_len = n->mergeable_rx_bufs ?
577 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
578 sizeof(struct virtio_net_hdr);
579 }
580
581 for (i = 0; i < n->max_queues; i++) {
582 nc = qemu_get_subqueue(n->nic, i);
583
584 if (peer_has_vnet_hdr(n) &&
585 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
586 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
587 n->host_hdr_len = n->guest_hdr_len;
588 }
589 }
590 }
591
592 static int virtio_net_max_tx_queue_size(VirtIONet *n)
593 {
594 NetClientState *peer = n->nic_conf.peers.ncs[0];
595
596 /*
597 * Backends other than vhost-user don't support max queue size.
598 */
599 if (!peer) {
600 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
601 }
602
603 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
604 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
605 }
606
607 return VIRTQUEUE_MAX_SIZE;
608 }
609
610 static int peer_attach(VirtIONet *n, int index)
611 {
612 NetClientState *nc = qemu_get_subqueue(n->nic, index);
613
614 if (!nc->peer) {
615 return 0;
616 }
617
618 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
619 vhost_set_vring_enable(nc->peer, 1);
620 }
621
622 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
623 return 0;
624 }
625
626 if (n->max_queues == 1) {
627 return 0;
628 }
629
630 return tap_enable(nc->peer);
631 }
632
633 static int peer_detach(VirtIONet *n, int index)
634 {
635 NetClientState *nc = qemu_get_subqueue(n->nic, index);
636
637 if (!nc->peer) {
638 return 0;
639 }
640
641 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
642 vhost_set_vring_enable(nc->peer, 0);
643 }
644
645 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
646 return 0;
647 }
648
649 return tap_disable(nc->peer);
650 }
651
652 static void virtio_net_set_queues(VirtIONet *n)
653 {
654 int i;
655 int r;
656
657 if (n->nic->peer_deleted) {
658 return;
659 }
660
661 for (i = 0; i < n->max_queues; i++) {
662 if (i < n->curr_queues) {
663 r = peer_attach(n, i);
664 assert(!r);
665 } else {
666 r = peer_detach(n, i);
667 assert(!r);
668 }
669 }
670 }
671
672 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
673
674 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
675 Error **errp)
676 {
677 VirtIONet *n = VIRTIO_NET(vdev);
678 NetClientState *nc = qemu_get_queue(n->nic);
679
680 /* Firstly sync all virtio-net possible supported features */
681 features |= n->host_features;
682
683 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
684
685 if (!peer_has_vnet_hdr(n)) {
686 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
687 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
688 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
689 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
690
691 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
692 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
693 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
694 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
695
696 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
697 }
698
699 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
700 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
701 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
702 }
703
704 if (!get_vhost_net(nc->peer)) {
705 return features;
706 }
707
708 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
709 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
710 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
711 vdev->backend_features = features;
712
713 if (n->mtu_bypass_backend &&
714 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
715 features |= (1ULL << VIRTIO_NET_F_MTU);
716 }
717
718 return features;
719 }
720
721 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
722 {
723 uint64_t features = 0;
724
725 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
726 * but also these: */
727 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
728 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
729 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
730 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
731 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
732
733 return features;
734 }
735
736 static void virtio_net_apply_guest_offloads(VirtIONet *n)
737 {
738 qemu_set_offload(qemu_get_queue(n->nic)->peer,
739 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
740 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
741 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
742 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
743 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
744 }
745
746 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
747 {
748 static const uint64_t guest_offloads_mask =
749 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
750 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
751 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
752 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
753 (1ULL << VIRTIO_NET_F_GUEST_UFO);
754
755 return guest_offloads_mask & features;
756 }
757
758 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
759 {
760 VirtIODevice *vdev = VIRTIO_DEVICE(n);
761 return virtio_net_guest_offloads_by_features(vdev->guest_features);
762 }
763
764 static void failover_add_primary(VirtIONet *n, Error **errp)
765 {
766 Error *err = NULL;
767
768 if (n->primary_dev) {
769 return;
770 }
771
772 n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
773 n->primary_device_id);
774 if (n->primary_device_opts) {
775 n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
776 if (err) {
777 qemu_opts_del(n->primary_device_opts);
778 }
779 if (n->primary_dev) {
780 n->primary_bus = n->primary_dev->parent_bus;
781 if (err) {
782 qdev_unplug(n->primary_dev, &err);
783 qdev_set_id(n->primary_dev, "");
784
785 }
786 }
787 } else {
788 error_setg(errp, "Primary device not found");
789 error_append_hint(errp, "Virtio-net failover will not work. Make "
790 "sure primary device has parameter"
791 " failover_pair_id=<virtio-net-id>\n");
792 }
793 if (err) {
794 error_propagate(errp, err);
795 }
796 }
797
798 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
799 {
800 VirtIONet *n = opaque;
801 int ret = 0;
802
803 const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
804
805 if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
806 n->primary_device_id = g_strdup(opts->id);
807 ret = 1;
808 }
809
810 return ret;
811 }
812
813 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
814 {
815 DeviceState *dev = NULL;
816 Error *err = NULL;
817
818 if (qemu_opts_foreach(qemu_find_opts("device"),
819 is_my_primary, n, &err)) {
820 if (err) {
821 error_propagate(errp, err);
822 return NULL;
823 }
824 if (n->primary_device_id) {
825 dev = qdev_find_recursive(sysbus_get_default(),
826 n->primary_device_id);
827 } else {
828 error_setg(errp, "Primary device id not found");
829 return NULL;
830 }
831 }
832 return dev;
833 }
834
835
836
837 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
838 DeviceState *dev,
839 Error **errp)
840 {
841 DeviceState *prim_dev = NULL;
842 Error *err = NULL;
843
844 prim_dev = virtio_net_find_primary(n, &err);
845 if (prim_dev) {
846 n->primary_device_id = g_strdup(prim_dev->id);
847 n->primary_device_opts = prim_dev->opts;
848 } else {
849 if (err) {
850 error_propagate(errp, err);
851 }
852 }
853
854 return prim_dev;
855 }
856
857 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
858 {
859 VirtIONet *n = VIRTIO_NET(vdev);
860 Error *err = NULL;
861 int i;
862
863 if (n->mtu_bypass_backend &&
864 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
865 features &= ~(1ULL << VIRTIO_NET_F_MTU);
866 }
867
868 virtio_net_set_multiqueue(n,
869 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
870 virtio_has_feature(features, VIRTIO_NET_F_MQ));
871
872 virtio_net_set_mrg_rx_bufs(n,
873 virtio_has_feature(features,
874 VIRTIO_NET_F_MRG_RXBUF),
875 virtio_has_feature(features,
876 VIRTIO_F_VERSION_1),
877 virtio_has_feature(features,
878 VIRTIO_NET_F_HASH_REPORT));
879
880 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
881 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
882 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
883 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
884 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
885
886 if (n->has_vnet_hdr) {
887 n->curr_guest_offloads =
888 virtio_net_guest_offloads_by_features(features);
889 virtio_net_apply_guest_offloads(n);
890 }
891
892 for (i = 0; i < n->max_queues; i++) {
893 NetClientState *nc = qemu_get_subqueue(n->nic, i);
894
895 if (!get_vhost_net(nc->peer)) {
896 continue;
897 }
898 vhost_net_ack_features(get_vhost_net(nc->peer), features);
899 }
900
901 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
902 memset(n->vlans, 0, MAX_VLAN >> 3);
903 } else {
904 memset(n->vlans, 0xff, MAX_VLAN >> 3);
905 }
906
907 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
908 qapi_event_send_failover_negotiated(n->netclient_name);
909 atomic_set(&n->primary_should_be_hidden, false);
910 failover_add_primary(n, &err);
911 if (err) {
912 n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
913 if (err) {
914 goto out_err;
915 }
916 failover_add_primary(n, &err);
917 if (err) {
918 goto out_err;
919 }
920 }
921 }
922 return;
923
924 out_err:
925 if (err) {
926 warn_report_err(err);
927 }
928 }
929
930 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
931 struct iovec *iov, unsigned int iov_cnt)
932 {
933 uint8_t on;
934 size_t s;
935 NetClientState *nc = qemu_get_queue(n->nic);
936
937 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
938 if (s != sizeof(on)) {
939 return VIRTIO_NET_ERR;
940 }
941
942 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
943 n->promisc = on;
944 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
945 n->allmulti = on;
946 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
947 n->alluni = on;
948 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
949 n->nomulti = on;
950 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
951 n->nouni = on;
952 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
953 n->nobcast = on;
954 } else {
955 return VIRTIO_NET_ERR;
956 }
957
958 rxfilter_notify(nc);
959
960 return VIRTIO_NET_OK;
961 }
962
963 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
964 struct iovec *iov, unsigned int iov_cnt)
965 {
966 VirtIODevice *vdev = VIRTIO_DEVICE(n);
967 uint64_t offloads;
968 size_t s;
969
970 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
971 return VIRTIO_NET_ERR;
972 }
973
974 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
975 if (s != sizeof(offloads)) {
976 return VIRTIO_NET_ERR;
977 }
978
979 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
980 uint64_t supported_offloads;
981
982 offloads = virtio_ldq_p(vdev, &offloads);
983
984 if (!n->has_vnet_hdr) {
985 return VIRTIO_NET_ERR;
986 }
987
988 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
989 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
990 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
991 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
992 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
993
994 supported_offloads = virtio_net_supported_guest_offloads(n);
995 if (offloads & ~supported_offloads) {
996 return VIRTIO_NET_ERR;
997 }
998
999 n->curr_guest_offloads = offloads;
1000 virtio_net_apply_guest_offloads(n);
1001
1002 return VIRTIO_NET_OK;
1003 } else {
1004 return VIRTIO_NET_ERR;
1005 }
1006 }
1007
1008 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1009 struct iovec *iov, unsigned int iov_cnt)
1010 {
1011 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1012 struct virtio_net_ctrl_mac mac_data;
1013 size_t s;
1014 NetClientState *nc = qemu_get_queue(n->nic);
1015
1016 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1017 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1018 return VIRTIO_NET_ERR;
1019 }
1020 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1021 assert(s == sizeof(n->mac));
1022 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1023 rxfilter_notify(nc);
1024
1025 return VIRTIO_NET_OK;
1026 }
1027
1028 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1029 return VIRTIO_NET_ERR;
1030 }
1031
1032 int in_use = 0;
1033 int first_multi = 0;
1034 uint8_t uni_overflow = 0;
1035 uint8_t multi_overflow = 0;
1036 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1037
1038 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1039 sizeof(mac_data.entries));
1040 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1041 if (s != sizeof(mac_data.entries)) {
1042 goto error;
1043 }
1044 iov_discard_front(&iov, &iov_cnt, s);
1045
1046 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1047 goto error;
1048 }
1049
1050 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1051 s = iov_to_buf(iov, iov_cnt, 0, macs,
1052 mac_data.entries * ETH_ALEN);
1053 if (s != mac_data.entries * ETH_ALEN) {
1054 goto error;
1055 }
1056 in_use += mac_data.entries;
1057 } else {
1058 uni_overflow = 1;
1059 }
1060
1061 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1062
1063 first_multi = in_use;
1064
1065 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1066 sizeof(mac_data.entries));
1067 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1068 if (s != sizeof(mac_data.entries)) {
1069 goto error;
1070 }
1071
1072 iov_discard_front(&iov, &iov_cnt, s);
1073
1074 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1075 goto error;
1076 }
1077
1078 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1079 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1080 mac_data.entries * ETH_ALEN);
1081 if (s != mac_data.entries * ETH_ALEN) {
1082 goto error;
1083 }
1084 in_use += mac_data.entries;
1085 } else {
1086 multi_overflow = 1;
1087 }
1088
1089 n->mac_table.in_use = in_use;
1090 n->mac_table.first_multi = first_multi;
1091 n->mac_table.uni_overflow = uni_overflow;
1092 n->mac_table.multi_overflow = multi_overflow;
1093 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1094 g_free(macs);
1095 rxfilter_notify(nc);
1096
1097 return VIRTIO_NET_OK;
1098
1099 error:
1100 g_free(macs);
1101 return VIRTIO_NET_ERR;
1102 }
1103
1104 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1105 struct iovec *iov, unsigned int iov_cnt)
1106 {
1107 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1108 uint16_t vid;
1109 size_t s;
1110 NetClientState *nc = qemu_get_queue(n->nic);
1111
1112 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1113 vid = virtio_lduw_p(vdev, &vid);
1114 if (s != sizeof(vid)) {
1115 return VIRTIO_NET_ERR;
1116 }
1117
1118 if (vid >= MAX_VLAN)
1119 return VIRTIO_NET_ERR;
1120
1121 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1122 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1123 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1124 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1125 else
1126 return VIRTIO_NET_ERR;
1127
1128 rxfilter_notify(nc);
1129
1130 return VIRTIO_NET_OK;
1131 }
1132
1133 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1134 struct iovec *iov, unsigned int iov_cnt)
1135 {
1136 trace_virtio_net_handle_announce(n->announce_timer.round);
1137 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1138 n->status & VIRTIO_NET_S_ANNOUNCE) {
1139 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1140 if (n->announce_timer.round) {
1141 qemu_announce_timer_step(&n->announce_timer);
1142 }
1143 return VIRTIO_NET_OK;
1144 } else {
1145 return VIRTIO_NET_ERR;
1146 }
1147 }
1148
1149 static void virtio_net_disable_rss(VirtIONet *n)
1150 {
1151 if (n->rss_data.enabled) {
1152 trace_virtio_net_rss_disable();
1153 }
1154 n->rss_data.enabled = false;
1155 }
1156
1157 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1158 struct iovec *iov,
1159 unsigned int iov_cnt,
1160 bool do_rss)
1161 {
1162 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1163 struct virtio_net_rss_config cfg;
1164 size_t s, offset = 0, size_get;
1165 uint16_t queues, i;
1166 struct {
1167 uint16_t us;
1168 uint8_t b;
1169 } QEMU_PACKED temp;
1170 const char *err_msg = "";
1171 uint32_t err_value = 0;
1172
1173 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1174 err_msg = "RSS is not negotiated";
1175 goto error;
1176 }
1177 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1178 err_msg = "Hash report is not negotiated";
1179 goto error;
1180 }
1181 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1182 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1183 if (s != size_get) {
1184 err_msg = "Short command buffer";
1185 err_value = (uint32_t)s;
1186 goto error;
1187 }
1188 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1189 n->rss_data.indirections_len =
1190 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1191 n->rss_data.indirections_len++;
1192 if (!do_rss) {
1193 n->rss_data.indirections_len = 1;
1194 }
1195 if (!is_power_of_2(n->rss_data.indirections_len)) {
1196 err_msg = "Invalid size of indirection table";
1197 err_value = n->rss_data.indirections_len;
1198 goto error;
1199 }
1200 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1201 err_msg = "Too large indirection table";
1202 err_value = n->rss_data.indirections_len;
1203 goto error;
1204 }
1205 n->rss_data.default_queue = do_rss ?
1206 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1207 if (n->rss_data.default_queue >= n->max_queues) {
1208 err_msg = "Invalid default queue";
1209 err_value = n->rss_data.default_queue;
1210 goto error;
1211 }
1212 offset += size_get;
1213 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1214 g_free(n->rss_data.indirections_table);
1215 n->rss_data.indirections_table = g_malloc(size_get);
1216 if (!n->rss_data.indirections_table) {
1217 err_msg = "Can't allocate indirections table";
1218 err_value = n->rss_data.indirections_len;
1219 goto error;
1220 }
1221 s = iov_to_buf(iov, iov_cnt, offset,
1222 n->rss_data.indirections_table, size_get);
1223 if (s != size_get) {
1224 err_msg = "Short indirection table buffer";
1225 err_value = (uint32_t)s;
1226 goto error;
1227 }
1228 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1229 uint16_t val = n->rss_data.indirections_table[i];
1230 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1231 }
1232 offset += size_get;
1233 size_get = sizeof(temp);
1234 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1235 if (s != size_get) {
1236 err_msg = "Can't get queues";
1237 err_value = (uint32_t)s;
1238 goto error;
1239 }
1240 queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1241 if (queues == 0 || queues > n->max_queues) {
1242 err_msg = "Invalid number of queues";
1243 err_value = queues;
1244 goto error;
1245 }
1246 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1247 err_msg = "Invalid key size";
1248 err_value = temp.b;
1249 goto error;
1250 }
1251 if (!temp.b && n->rss_data.hash_types) {
1252 err_msg = "No key provided";
1253 err_value = 0;
1254 goto error;
1255 }
1256 if (!temp.b && !n->rss_data.hash_types) {
1257 virtio_net_disable_rss(n);
1258 return queues;
1259 }
1260 offset += size_get;
1261 size_get = temp.b;
1262 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1263 if (s != size_get) {
1264 err_msg = "Can get key buffer";
1265 err_value = (uint32_t)s;
1266 goto error;
1267 }
1268 n->rss_data.enabled = true;
1269 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1270 n->rss_data.indirections_len,
1271 temp.b);
1272 return queues;
1273 error:
1274 trace_virtio_net_rss_error(err_msg, err_value);
1275 virtio_net_disable_rss(n);
1276 return 0;
1277 }
1278
1279 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1280 struct iovec *iov, unsigned int iov_cnt)
1281 {
1282 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1283 uint16_t queues;
1284
1285 virtio_net_disable_rss(n);
1286 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1287 queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1288 return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1289 }
1290 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1291 queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1292 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1293 struct virtio_net_ctrl_mq mq;
1294 size_t s;
1295 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1296 return VIRTIO_NET_ERR;
1297 }
1298 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1299 if (s != sizeof(mq)) {
1300 return VIRTIO_NET_ERR;
1301 }
1302 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1303
1304 } else {
1305 return VIRTIO_NET_ERR;
1306 }
1307
1308 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1309 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1310 queues > n->max_queues ||
1311 !n->multiqueue) {
1312 return VIRTIO_NET_ERR;
1313 }
1314
1315 n->curr_queues = queues;
1316 /* stop the backend before changing the number of queues to avoid handling a
1317 * disabled queue */
1318 virtio_net_set_status(vdev, vdev->status);
1319 virtio_net_set_queues(n);
1320
1321 return VIRTIO_NET_OK;
1322 }
1323
1324 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1325 {
1326 VirtIONet *n = VIRTIO_NET(vdev);
1327 struct virtio_net_ctrl_hdr ctrl;
1328 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1329 VirtQueueElement *elem;
1330 size_t s;
1331 struct iovec *iov, *iov2;
1332 unsigned int iov_cnt;
1333
1334 for (;;) {
1335 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1336 if (!elem) {
1337 break;
1338 }
1339 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1340 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1341 virtio_error(vdev, "virtio-net ctrl missing headers");
1342 virtqueue_detach_element(vq, elem, 0);
1343 g_free(elem);
1344 break;
1345 }
1346
1347 iov_cnt = elem->out_num;
1348 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1349 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1350 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1351 if (s != sizeof(ctrl)) {
1352 status = VIRTIO_NET_ERR;
1353 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1354 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1355 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1356 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1357 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1358 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1359 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1360 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1361 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1362 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1363 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1364 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1365 }
1366
1367 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1368 assert(s == sizeof(status));
1369
1370 virtqueue_push(vq, elem, sizeof(status));
1371 virtio_notify(vdev, vq);
1372 g_free(iov2);
1373 g_free(elem);
1374 }
1375 }
1376
1377 /* RX */
1378
1379 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1380 {
1381 VirtIONet *n = VIRTIO_NET(vdev);
1382 int queue_index = vq2q(virtio_get_queue_index(vq));
1383
1384 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1385 }
1386
1387 static bool virtio_net_can_receive(NetClientState *nc)
1388 {
1389 VirtIONet *n = qemu_get_nic_opaque(nc);
1390 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1391 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1392
1393 if (!vdev->vm_running) {
1394 return false;
1395 }
1396
1397 if (nc->queue_index >= n->curr_queues) {
1398 return false;
1399 }
1400
1401 if (!virtio_queue_ready(q->rx_vq) ||
1402 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1403 return false;
1404 }
1405
1406 return true;
1407 }
1408
1409 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1410 {
1411 VirtIONet *n = q->n;
1412 if (virtio_queue_empty(q->rx_vq) ||
1413 (n->mergeable_rx_bufs &&
1414 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1415 virtio_queue_set_notification(q->rx_vq, 1);
1416
1417 /* To avoid a race condition where the guest has made some buffers
1418 * available after the above check but before notification was
1419 * enabled, check for available buffers again.
1420 */
1421 if (virtio_queue_empty(q->rx_vq) ||
1422 (n->mergeable_rx_bufs &&
1423 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1424 return 0;
1425 }
1426 }
1427
1428 virtio_queue_set_notification(q->rx_vq, 0);
1429 return 1;
1430 }
1431
1432 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1433 {
1434 virtio_tswap16s(vdev, &hdr->hdr_len);
1435 virtio_tswap16s(vdev, &hdr->gso_size);
1436 virtio_tswap16s(vdev, &hdr->csum_start);
1437 virtio_tswap16s(vdev, &hdr->csum_offset);
1438 }
1439
1440 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1441 * it never finds out that the packets don't have valid checksums. This
1442 * causes dhclient to get upset. Fedora's carried a patch for ages to
1443 * fix this with Xen but it hasn't appeared in an upstream release of
1444 * dhclient yet.
1445 *
1446 * To avoid breaking existing guests, we catch udp packets and add
1447 * checksums. This is terrible but it's better than hacking the guest
1448 * kernels.
1449 *
1450 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1451 * we should provide a mechanism to disable it to avoid polluting the host
1452 * cache.
1453 */
1454 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1455 uint8_t *buf, size_t size)
1456 {
1457 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1458 (size > 27 && size < 1500) && /* normal sized MTU */
1459 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1460 (buf[23] == 17) && /* ip.protocol == UDP */
1461 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1462 net_checksum_calculate(buf, size);
1463 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1464 }
1465 }
1466
1467 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1468 const void *buf, size_t size)
1469 {
1470 if (n->has_vnet_hdr) {
1471 /* FIXME this cast is evil */
1472 void *wbuf = (void *)buf;
1473 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1474 size - n->host_hdr_len);
1475
1476 if (n->needs_vnet_hdr_swap) {
1477 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1478 }
1479 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1480 } else {
1481 struct virtio_net_hdr hdr = {
1482 .flags = 0,
1483 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1484 };
1485 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1486 }
1487 }
1488
1489 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1490 {
1491 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1492 static const uint8_t vlan[] = {0x81, 0x00};
1493 uint8_t *ptr = (uint8_t *)buf;
1494 int i;
1495
1496 if (n->promisc)
1497 return 1;
1498
1499 ptr += n->host_hdr_len;
1500
1501 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1502 int vid = lduw_be_p(ptr + 14) & 0xfff;
1503 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1504 return 0;
1505 }
1506
1507 if (ptr[0] & 1) { // multicast
1508 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1509 return !n->nobcast;
1510 } else if (n->nomulti) {
1511 return 0;
1512 } else if (n->allmulti || n->mac_table.multi_overflow) {
1513 return 1;
1514 }
1515
1516 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1517 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1518 return 1;
1519 }
1520 }
1521 } else { // unicast
1522 if (n->nouni) {
1523 return 0;
1524 } else if (n->alluni || n->mac_table.uni_overflow) {
1525 return 1;
1526 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1527 return 1;
1528 }
1529
1530 for (i = 0; i < n->mac_table.first_multi; i++) {
1531 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1532 return 1;
1533 }
1534 }
1535 }
1536
1537 return 0;
1538 }
1539
1540 static uint8_t virtio_net_get_hash_type(bool isip4,
1541 bool isip6,
1542 bool isudp,
1543 bool istcp,
1544 uint32_t types)
1545 {
1546 if (isip4) {
1547 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1548 return NetPktRssIpV4Tcp;
1549 }
1550 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1551 return NetPktRssIpV4Udp;
1552 }
1553 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1554 return NetPktRssIpV4;
1555 }
1556 } else if (isip6) {
1557 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1558 VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1559
1560 if (istcp && (types & mask)) {
1561 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1562 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1563 }
1564 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1565 if (isudp && (types & mask)) {
1566 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1567 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1568 }
1569 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1570 if (types & mask) {
1571 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1572 NetPktRssIpV6Ex : NetPktRssIpV6;
1573 }
1574 }
1575 return 0xff;
1576 }
1577
1578 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1579 uint32_t hash)
1580 {
1581 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1582 hdr->hash_value = hash;
1583 hdr->hash_report = report;
1584 }
1585
1586 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1587 size_t size)
1588 {
1589 VirtIONet *n = qemu_get_nic_opaque(nc);
1590 unsigned int index = nc->queue_index, new_index = index;
1591 struct NetRxPkt *pkt = n->rx_pkt;
1592 uint8_t net_hash_type;
1593 uint32_t hash;
1594 bool isip4, isip6, isudp, istcp;
1595 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1596 VIRTIO_NET_HASH_REPORT_IPv4,
1597 VIRTIO_NET_HASH_REPORT_TCPv4,
1598 VIRTIO_NET_HASH_REPORT_TCPv6,
1599 VIRTIO_NET_HASH_REPORT_IPv6,
1600 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1601 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1602 VIRTIO_NET_HASH_REPORT_UDPv4,
1603 VIRTIO_NET_HASH_REPORT_UDPv6,
1604 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1605 };
1606
1607 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1608 size - n->host_hdr_len);
1609 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1610 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1611 istcp = isudp = false;
1612 }
1613 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1614 istcp = isudp = false;
1615 }
1616 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1617 n->rss_data.hash_types);
1618 if (net_hash_type > NetPktRssIpV6UdpEx) {
1619 if (n->rss_data.populate_hash) {
1620 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1621 }
1622 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1623 }
1624
1625 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1626
1627 if (n->rss_data.populate_hash) {
1628 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1629 }
1630
1631 if (n->rss_data.redirect) {
1632 new_index = hash & (n->rss_data.indirections_len - 1);
1633 new_index = n->rss_data.indirections_table[new_index];
1634 }
1635
1636 return (index == new_index) ? -1 : new_index;
1637 }
1638
1639 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1640 size_t size, bool no_rss)
1641 {
1642 VirtIONet *n = qemu_get_nic_opaque(nc);
1643 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1644 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1645 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1646 struct virtio_net_hdr_mrg_rxbuf mhdr;
1647 unsigned mhdr_cnt = 0;
1648 size_t offset, i, guest_offset;
1649
1650 if (!virtio_net_can_receive(nc)) {
1651 return -1;
1652 }
1653
1654 if (!no_rss && n->rss_data.enabled) {
1655 int index = virtio_net_process_rss(nc, buf, size);
1656 if (index >= 0) {
1657 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1658 return virtio_net_receive_rcu(nc2, buf, size, true);
1659 }
1660 }
1661
1662 /* hdr_len refers to the header we supply to the guest */
1663 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1664 return 0;
1665 }
1666
1667 if (!receive_filter(n, buf, size))
1668 return size;
1669
1670 offset = i = 0;
1671
1672 while (offset < size) {
1673 VirtQueueElement *elem;
1674 int len, total;
1675 const struct iovec *sg;
1676
1677 total = 0;
1678
1679 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1680 if (!elem) {
1681 if (i) {
1682 virtio_error(vdev, "virtio-net unexpected empty queue: "
1683 "i %zd mergeable %d offset %zd, size %zd, "
1684 "guest hdr len %zd, host hdr len %zd "
1685 "guest features 0x%" PRIx64,
1686 i, n->mergeable_rx_bufs, offset, size,
1687 n->guest_hdr_len, n->host_hdr_len,
1688 vdev->guest_features);
1689 }
1690 return -1;
1691 }
1692
1693 if (elem->in_num < 1) {
1694 virtio_error(vdev,
1695 "virtio-net receive queue contains no in buffers");
1696 virtqueue_detach_element(q->rx_vq, elem, 0);
1697 g_free(elem);
1698 return -1;
1699 }
1700
1701 sg = elem->in_sg;
1702 if (i == 0) {
1703 assert(offset == 0);
1704 if (n->mergeable_rx_bufs) {
1705 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1706 sg, elem->in_num,
1707 offsetof(typeof(mhdr), num_buffers),
1708 sizeof(mhdr.num_buffers));
1709 }
1710
1711 receive_header(n, sg, elem->in_num, buf, size);
1712 if (n->rss_data.populate_hash) {
1713 offset = sizeof(mhdr);
1714 iov_from_buf(sg, elem->in_num, offset,
1715 buf + offset, n->host_hdr_len - sizeof(mhdr));
1716 }
1717 offset = n->host_hdr_len;
1718 total += n->guest_hdr_len;
1719 guest_offset = n->guest_hdr_len;
1720 } else {
1721 guest_offset = 0;
1722 }
1723
1724 /* copy in packet. ugh */
1725 len = iov_from_buf(sg, elem->in_num, guest_offset,
1726 buf + offset, size - offset);
1727 total += len;
1728 offset += len;
1729 /* If buffers can't be merged, at this point we
1730 * must have consumed the complete packet.
1731 * Otherwise, drop it. */
1732 if (!n->mergeable_rx_bufs && offset < size) {
1733 virtqueue_unpop(q->rx_vq, elem, total);
1734 g_free(elem);
1735 return size;
1736 }
1737
1738 /* signal other side */
1739 virtqueue_fill(q->rx_vq, elem, total, i++);
1740 g_free(elem);
1741 }
1742
1743 if (mhdr_cnt) {
1744 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1745 iov_from_buf(mhdr_sg, mhdr_cnt,
1746 0,
1747 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1748 }
1749
1750 virtqueue_flush(q->rx_vq, i);
1751 virtio_notify(vdev, q->rx_vq);
1752
1753 return size;
1754 }
1755
1756 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1757 size_t size)
1758 {
1759 RCU_READ_LOCK_GUARD();
1760
1761 return virtio_net_receive_rcu(nc, buf, size, false);
1762 }
1763
1764 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1765 const uint8_t *buf,
1766 VirtioNetRscUnit *unit)
1767 {
1768 uint16_t ip_hdrlen;
1769 struct ip_header *ip;
1770
1771 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1772 + sizeof(struct eth_header));
1773 unit->ip = (void *)ip;
1774 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1775 unit->ip_plen = &ip->ip_len;
1776 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1777 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1778 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1779 }
1780
1781 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1782 const uint8_t *buf,
1783 VirtioNetRscUnit *unit)
1784 {
1785 struct ip6_header *ip6;
1786
1787 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1788 + sizeof(struct eth_header));
1789 unit->ip = ip6;
1790 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1791 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1792 + sizeof(struct ip6_header));
1793 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1794
1795 /* There is a difference between payload lenght in ipv4 and v6,
1796 ip header is excluded in ipv6 */
1797 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1798 }
1799
1800 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1801 VirtioNetRscSeg *seg)
1802 {
1803 int ret;
1804 struct virtio_net_hdr_v1 *h;
1805
1806 h = (struct virtio_net_hdr_v1 *)seg->buf;
1807 h->flags = 0;
1808 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1809
1810 if (seg->is_coalesced) {
1811 h->rsc.segments = seg->packets;
1812 h->rsc.dup_acks = seg->dup_ack;
1813 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1814 if (chain->proto == ETH_P_IP) {
1815 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1816 } else {
1817 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1818 }
1819 }
1820
1821 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1822 QTAILQ_REMOVE(&chain->buffers, seg, next);
1823 g_free(seg->buf);
1824 g_free(seg);
1825
1826 return ret;
1827 }
1828
1829 static void virtio_net_rsc_purge(void *opq)
1830 {
1831 VirtioNetRscSeg *seg, *rn;
1832 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1833
1834 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1835 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1836 chain->stat.purge_failed++;
1837 continue;
1838 }
1839 }
1840
1841 chain->stat.timer++;
1842 if (!QTAILQ_EMPTY(&chain->buffers)) {
1843 timer_mod(chain->drain_timer,
1844 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1845 }
1846 }
1847
1848 static void virtio_net_rsc_cleanup(VirtIONet *n)
1849 {
1850 VirtioNetRscChain *chain, *rn_chain;
1851 VirtioNetRscSeg *seg, *rn_seg;
1852
1853 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1854 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1855 QTAILQ_REMOVE(&chain->buffers, seg, next);
1856 g_free(seg->buf);
1857 g_free(seg);
1858 }
1859
1860 timer_del(chain->drain_timer);
1861 timer_free(chain->drain_timer);
1862 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1863 g_free(chain);
1864 }
1865 }
1866
1867 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1868 NetClientState *nc,
1869 const uint8_t *buf, size_t size)
1870 {
1871 uint16_t hdr_len;
1872 VirtioNetRscSeg *seg;
1873
1874 hdr_len = chain->n->guest_hdr_len;
1875 seg = g_malloc(sizeof(VirtioNetRscSeg));
1876 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1877 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1878 memcpy(seg->buf, buf, size);
1879 seg->size = size;
1880 seg->packets = 1;
1881 seg->dup_ack = 0;
1882 seg->is_coalesced = 0;
1883 seg->nc = nc;
1884
1885 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1886 chain->stat.cache++;
1887
1888 switch (chain->proto) {
1889 case ETH_P_IP:
1890 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1891 break;
1892 case ETH_P_IPV6:
1893 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1894 break;
1895 default:
1896 g_assert_not_reached();
1897 }
1898 }
1899
1900 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1901 VirtioNetRscSeg *seg,
1902 const uint8_t *buf,
1903 struct tcp_header *n_tcp,
1904 struct tcp_header *o_tcp)
1905 {
1906 uint32_t nack, oack;
1907 uint16_t nwin, owin;
1908
1909 nack = htonl(n_tcp->th_ack);
1910 nwin = htons(n_tcp->th_win);
1911 oack = htonl(o_tcp->th_ack);
1912 owin = htons(o_tcp->th_win);
1913
1914 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1915 chain->stat.ack_out_of_win++;
1916 return RSC_FINAL;
1917 } else if (nack == oack) {
1918 /* duplicated ack or window probe */
1919 if (nwin == owin) {
1920 /* duplicated ack, add dup ack count due to whql test up to 1 */
1921 chain->stat.dup_ack++;
1922 return RSC_FINAL;
1923 } else {
1924 /* Coalesce window update */
1925 o_tcp->th_win = n_tcp->th_win;
1926 chain->stat.win_update++;
1927 return RSC_COALESCE;
1928 }
1929 } else {
1930 /* pure ack, go to 'C', finalize*/
1931 chain->stat.pure_ack++;
1932 return RSC_FINAL;
1933 }
1934 }
1935
1936 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1937 VirtioNetRscSeg *seg,
1938 const uint8_t *buf,
1939 VirtioNetRscUnit *n_unit)
1940 {
1941 void *data;
1942 uint16_t o_ip_len;
1943 uint32_t nseq, oseq;
1944 VirtioNetRscUnit *o_unit;
1945
1946 o_unit = &seg->unit;
1947 o_ip_len = htons(*o_unit->ip_plen);
1948 nseq = htonl(n_unit->tcp->th_seq);
1949 oseq = htonl(o_unit->tcp->th_seq);
1950
1951 /* out of order or retransmitted. */
1952 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1953 chain->stat.data_out_of_win++;
1954 return RSC_FINAL;
1955 }
1956
1957 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1958 if (nseq == oseq) {
1959 if ((o_unit->payload == 0) && n_unit->payload) {
1960 /* From no payload to payload, normal case, not a dup ack or etc */
1961 chain->stat.data_after_pure_ack++;
1962 goto coalesce;
1963 } else {
1964 return virtio_net_rsc_handle_ack(chain, seg, buf,
1965 n_unit->tcp, o_unit->tcp);
1966 }
1967 } else if ((nseq - oseq) != o_unit->payload) {
1968 /* Not a consistent packet, out of order */
1969 chain->stat.data_out_of_order++;
1970 return RSC_FINAL;
1971 } else {
1972 coalesce:
1973 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1974 chain->stat.over_size++;
1975 return RSC_FINAL;
1976 }
1977
1978 /* Here comes the right data, the payload length in v4/v6 is different,
1979 so use the field value to update and record the new data len */
1980 o_unit->payload += n_unit->payload; /* update new data len */
1981
1982 /* update field in ip header */
1983 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1984
1985 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1986 for windows guest, while this may change the behavior for linux
1987 guest (only if it uses RSC feature). */
1988 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1989
1990 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1991 o_unit->tcp->th_win = n_unit->tcp->th_win;
1992
1993 memmove(seg->buf + seg->size, data, n_unit->payload);
1994 seg->size += n_unit->payload;
1995 seg->packets++;
1996 chain->stat.coalesced++;
1997 return RSC_COALESCE;
1998 }
1999 }
2000
2001 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2002 VirtioNetRscSeg *seg,
2003 const uint8_t *buf, size_t size,
2004 VirtioNetRscUnit *unit)
2005 {
2006 struct ip_header *ip1, *ip2;
2007
2008 ip1 = (struct ip_header *)(unit->ip);
2009 ip2 = (struct ip_header *)(seg->unit.ip);
2010 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2011 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2012 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2013 chain->stat.no_match++;
2014 return RSC_NO_MATCH;
2015 }
2016
2017 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2018 }
2019
2020 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2021 VirtioNetRscSeg *seg,
2022 const uint8_t *buf, size_t size,
2023 VirtioNetRscUnit *unit)
2024 {
2025 struct ip6_header *ip1, *ip2;
2026
2027 ip1 = (struct ip6_header *)(unit->ip);
2028 ip2 = (struct ip6_header *)(seg->unit.ip);
2029 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2030 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2031 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2032 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2033 chain->stat.no_match++;
2034 return RSC_NO_MATCH;
2035 }
2036
2037 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2038 }
2039
2040 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2041 * to prevent out of order */
2042 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2043 struct tcp_header *tcp)
2044 {
2045 uint16_t tcp_hdr;
2046 uint16_t tcp_flag;
2047
2048 tcp_flag = htons(tcp->th_offset_flags);
2049 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2050 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2051 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
2052 if (tcp_flag & TH_SYN) {
2053 chain->stat.tcp_syn++;
2054 return RSC_BYPASS;
2055 }
2056
2057 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2058 chain->stat.tcp_ctrl_drain++;
2059 return RSC_FINAL;
2060 }
2061
2062 if (tcp_hdr > sizeof(struct tcp_header)) {
2063 chain->stat.tcp_all_opt++;
2064 return RSC_FINAL;
2065 }
2066
2067 return RSC_CANDIDATE;
2068 }
2069
2070 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2071 NetClientState *nc,
2072 const uint8_t *buf, size_t size,
2073 VirtioNetRscUnit *unit)
2074 {
2075 int ret;
2076 VirtioNetRscSeg *seg, *nseg;
2077
2078 if (QTAILQ_EMPTY(&chain->buffers)) {
2079 chain->stat.empty_cache++;
2080 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2081 timer_mod(chain->drain_timer,
2082 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2083 return size;
2084 }
2085
2086 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2087 if (chain->proto == ETH_P_IP) {
2088 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2089 } else {
2090 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2091 }
2092
2093 if (ret == RSC_FINAL) {
2094 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2095 /* Send failed */
2096 chain->stat.final_failed++;
2097 return 0;
2098 }
2099
2100 /* Send current packet */
2101 return virtio_net_do_receive(nc, buf, size);
2102 } else if (ret == RSC_NO_MATCH) {
2103 continue;
2104 } else {
2105 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2106 seg->is_coalesced = 1;
2107 return size;
2108 }
2109 }
2110
2111 chain->stat.no_match_cache++;
2112 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2113 return size;
2114 }
2115
2116 /* Drain a connection data, this is to avoid out of order segments */
2117 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2118 NetClientState *nc,
2119 const uint8_t *buf, size_t size,
2120 uint16_t ip_start, uint16_t ip_size,
2121 uint16_t tcp_port)
2122 {
2123 VirtioNetRscSeg *seg, *nseg;
2124 uint32_t ppair1, ppair2;
2125
2126 ppair1 = *(uint32_t *)(buf + tcp_port);
2127 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2128 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2129 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2130 || (ppair1 != ppair2)) {
2131 continue;
2132 }
2133 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2134 chain->stat.drain_failed++;
2135 }
2136
2137 break;
2138 }
2139
2140 return virtio_net_do_receive(nc, buf, size);
2141 }
2142
2143 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2144 struct ip_header *ip,
2145 const uint8_t *buf, size_t size)
2146 {
2147 uint16_t ip_len;
2148
2149 /* Not an ipv4 packet */
2150 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2151 chain->stat.ip_option++;
2152 return RSC_BYPASS;
2153 }
2154
2155 /* Don't handle packets with ip option */
2156 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2157 chain->stat.ip_option++;
2158 return RSC_BYPASS;
2159 }
2160
2161 if (ip->ip_p != IPPROTO_TCP) {
2162 chain->stat.bypass_not_tcp++;
2163 return RSC_BYPASS;
2164 }
2165
2166 /* Don't handle packets with ip fragment */
2167 if (!(htons(ip->ip_off) & IP_DF)) {
2168 chain->stat.ip_frag++;
2169 return RSC_BYPASS;
2170 }
2171
2172 /* Don't handle packets with ecn flag */
2173 if (IPTOS_ECN(ip->ip_tos)) {
2174 chain->stat.ip_ecn++;
2175 return RSC_BYPASS;
2176 }
2177
2178 ip_len = htons(ip->ip_len);
2179 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2180 || ip_len > (size - chain->n->guest_hdr_len -
2181 sizeof(struct eth_header))) {
2182 chain->stat.ip_hacked++;
2183 return RSC_BYPASS;
2184 }
2185
2186 return RSC_CANDIDATE;
2187 }
2188
2189 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2190 NetClientState *nc,
2191 const uint8_t *buf, size_t size)
2192 {
2193 int32_t ret;
2194 uint16_t hdr_len;
2195 VirtioNetRscUnit unit;
2196
2197 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2198
2199 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2200 + sizeof(struct tcp_header))) {
2201 chain->stat.bypass_not_tcp++;
2202 return virtio_net_do_receive(nc, buf, size);
2203 }
2204
2205 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2206 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2207 != RSC_CANDIDATE) {
2208 return virtio_net_do_receive(nc, buf, size);
2209 }
2210
2211 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2212 if (ret == RSC_BYPASS) {
2213 return virtio_net_do_receive(nc, buf, size);
2214 } else if (ret == RSC_FINAL) {
2215 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2216 ((hdr_len + sizeof(struct eth_header)) + 12),
2217 VIRTIO_NET_IP4_ADDR_SIZE,
2218 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2219 }
2220
2221 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2222 }
2223
2224 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2225 struct ip6_header *ip6,
2226 const uint8_t *buf, size_t size)
2227 {
2228 uint16_t ip_len;
2229
2230 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2231 != IP_HEADER_VERSION_6) {
2232 return RSC_BYPASS;
2233 }
2234
2235 /* Both option and protocol is checked in this */
2236 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2237 chain->stat.bypass_not_tcp++;
2238 return RSC_BYPASS;
2239 }
2240
2241 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2242 if (ip_len < sizeof(struct tcp_header) ||
2243 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2244 - sizeof(struct ip6_header))) {
2245 chain->stat.ip_hacked++;
2246 return RSC_BYPASS;
2247 }
2248
2249 /* Don't handle packets with ecn flag */
2250 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2251 chain->stat.ip_ecn++;
2252 return RSC_BYPASS;
2253 }
2254
2255 return RSC_CANDIDATE;
2256 }
2257
2258 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2259 const uint8_t *buf, size_t size)
2260 {
2261 int32_t ret;
2262 uint16_t hdr_len;
2263 VirtioNetRscChain *chain;
2264 VirtioNetRscUnit unit;
2265
2266 chain = (VirtioNetRscChain *)opq;
2267 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2268
2269 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2270 + sizeof(tcp_header))) {
2271 return virtio_net_do_receive(nc, buf, size);
2272 }
2273
2274 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2275 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2276 unit.ip, buf, size)) {
2277 return virtio_net_do_receive(nc, buf, size);
2278 }
2279
2280 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2281 if (ret == RSC_BYPASS) {
2282 return virtio_net_do_receive(nc, buf, size);
2283 } else if (ret == RSC_FINAL) {
2284 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2285 ((hdr_len + sizeof(struct eth_header)) + 8),
2286 VIRTIO_NET_IP6_ADDR_SIZE,
2287 hdr_len + sizeof(struct eth_header)
2288 + sizeof(struct ip6_header));
2289 }
2290
2291 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2292 }
2293
2294 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2295 NetClientState *nc,
2296 uint16_t proto)
2297 {
2298 VirtioNetRscChain *chain;
2299
2300 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2301 return NULL;
2302 }
2303
2304 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2305 if (chain->proto == proto) {
2306 return chain;
2307 }
2308 }
2309
2310 chain = g_malloc(sizeof(*chain));
2311 chain->n = n;
2312 chain->proto = proto;
2313 if (proto == (uint16_t)ETH_P_IP) {
2314 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2315 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2316 } else {
2317 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2318 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2319 }
2320 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2321 virtio_net_rsc_purge, chain);
2322 memset(&chain->stat, 0, sizeof(chain->stat));
2323
2324 QTAILQ_INIT(&chain->buffers);
2325 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2326
2327 return chain;
2328 }
2329
2330 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2331 const uint8_t *buf,
2332 size_t size)
2333 {
2334 uint16_t proto;
2335 VirtioNetRscChain *chain;
2336 struct eth_header *eth;
2337 VirtIONet *n;
2338
2339 n = qemu_get_nic_opaque(nc);
2340 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2341 return virtio_net_do_receive(nc, buf, size);
2342 }
2343
2344 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2345 proto = htons(eth->h_proto);
2346
2347 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2348 if (chain) {
2349 chain->stat.received++;
2350 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2351 return virtio_net_rsc_receive4(chain, nc, buf, size);
2352 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2353 return virtio_net_rsc_receive6(chain, nc, buf, size);
2354 }
2355 }
2356 return virtio_net_do_receive(nc, buf, size);
2357 }
2358
2359 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2360 size_t size)
2361 {
2362 VirtIONet *n = qemu_get_nic_opaque(nc);
2363 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2364 return virtio_net_rsc_receive(nc, buf, size);
2365 } else {
2366 return virtio_net_do_receive(nc, buf, size);
2367 }
2368 }
2369
2370 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2371
2372 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2373 {
2374 VirtIONet *n = qemu_get_nic_opaque(nc);
2375 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2376 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2377
2378 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2379 virtio_notify(vdev, q->tx_vq);
2380
2381 g_free(q->async_tx.elem);
2382 q->async_tx.elem = NULL;
2383
2384 virtio_queue_set_notification(q->tx_vq, 1);
2385 virtio_net_flush_tx(q);
2386 }
2387
2388 /* TX */
2389 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2390 {
2391 VirtIONet *n = q->n;
2392 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2393 VirtQueueElement *elem;
2394 int32_t num_packets = 0;
2395 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2396 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2397 return num_packets;
2398 }
2399
2400 if (q->async_tx.elem) {
2401 virtio_queue_set_notification(q->tx_vq, 0);
2402 return num_packets;
2403 }
2404
2405 for (;;) {
2406 ssize_t ret;
2407 unsigned int out_num;
2408 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2409 struct virtio_net_hdr_mrg_rxbuf mhdr;
2410
2411 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2412 if (!elem) {
2413 break;
2414 }
2415
2416 out_num = elem->out_num;
2417 out_sg = elem->out_sg;
2418 if (out_num < 1) {
2419 virtio_error(vdev, "virtio-net header not in first element");
2420 virtqueue_detach_element(q->tx_vq, elem, 0);
2421 g_free(elem);
2422 return -EINVAL;
2423 }
2424
2425 if (n->has_vnet_hdr) {
2426 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2427 n->guest_hdr_len) {
2428 virtio_error(vdev, "virtio-net header incorrect");
2429 virtqueue_detach_element(q->tx_vq, elem, 0);
2430 g_free(elem);
2431 return -EINVAL;
2432 }
2433 if (n->needs_vnet_hdr_swap) {
2434 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2435 sg2[0].iov_base = &mhdr;
2436 sg2[0].iov_len = n->guest_hdr_len;
2437 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2438 out_sg, out_num,
2439 n->guest_hdr_len, -1);
2440 if (out_num == VIRTQUEUE_MAX_SIZE) {
2441 goto drop;
2442 }
2443 out_num += 1;
2444 out_sg = sg2;
2445 }
2446 }
2447 /*
2448 * If host wants to see the guest header as is, we can
2449 * pass it on unchanged. Otherwise, copy just the parts
2450 * that host is interested in.
2451 */
2452 assert(n->host_hdr_len <= n->guest_hdr_len);
2453 if (n->host_hdr_len != n->guest_hdr_len) {
2454 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2455 out_sg, out_num,
2456 0, n->host_hdr_len);
2457 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2458 out_sg, out_num,
2459 n->guest_hdr_len, -1);
2460 out_num = sg_num;
2461 out_sg = sg;
2462 }
2463
2464 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2465 out_sg, out_num, virtio_net_tx_complete);
2466 if (ret == 0) {
2467 virtio_queue_set_notification(q->tx_vq, 0);
2468 q->async_tx.elem = elem;
2469 return -EBUSY;
2470 }
2471
2472 drop:
2473 virtqueue_push(q->tx_vq, elem, 0);
2474 virtio_notify(vdev, q->tx_vq);
2475 g_free(elem);
2476
2477 if (++num_packets >= n->tx_burst) {
2478 break;
2479 }
2480 }
2481 return num_packets;
2482 }
2483
2484 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2485 {
2486 VirtIONet *n = VIRTIO_NET(vdev);
2487 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2488
2489 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2490 virtio_net_drop_tx_queue_data(vdev, vq);
2491 return;
2492 }
2493
2494 /* This happens when device was stopped but VCPU wasn't. */
2495 if (!vdev->vm_running) {
2496 q->tx_waiting = 1;
2497 return;
2498 }
2499
2500 if (q->tx_waiting) {
2501 virtio_queue_set_notification(vq, 1);
2502 timer_del(q->tx_timer);
2503 q->tx_waiting = 0;
2504 if (virtio_net_flush_tx(q) == -EINVAL) {
2505 return;
2506 }
2507 } else {
2508 timer_mod(q->tx_timer,
2509 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2510 q->tx_waiting = 1;
2511 virtio_queue_set_notification(vq, 0);
2512 }
2513 }
2514
2515 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2516 {
2517 VirtIONet *n = VIRTIO_NET(vdev);
2518 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2519
2520 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2521 virtio_net_drop_tx_queue_data(vdev, vq);
2522 return;
2523 }
2524
2525 if (unlikely(q->tx_waiting)) {
2526 return;
2527 }
2528 q->tx_waiting = 1;
2529 /* This happens when device was stopped but VCPU wasn't. */
2530 if (!vdev->vm_running) {
2531 return;
2532 }
2533 virtio_queue_set_notification(vq, 0);
2534 qemu_bh_schedule(q->tx_bh);
2535 }
2536
2537 static void virtio_net_tx_timer(void *opaque)
2538 {
2539 VirtIONetQueue *q = opaque;
2540 VirtIONet *n = q->n;
2541 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2542 /* This happens when device was stopped but BH wasn't. */
2543 if (!vdev->vm_running) {
2544 /* Make sure tx waiting is set, so we'll run when restarted. */
2545 assert(q->tx_waiting);
2546 return;
2547 }
2548
2549 q->tx_waiting = 0;
2550
2551 /* Just in case the driver is not ready on more */
2552 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2553 return;
2554 }
2555
2556 virtio_queue_set_notification(q->tx_vq, 1);
2557 virtio_net_flush_tx(q);
2558 }
2559
2560 static void virtio_net_tx_bh(void *opaque)
2561 {
2562 VirtIONetQueue *q = opaque;
2563 VirtIONet *n = q->n;
2564 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2565 int32_t ret;
2566
2567 /* This happens when device was stopped but BH wasn't. */
2568 if (!vdev->vm_running) {
2569 /* Make sure tx waiting is set, so we'll run when restarted. */
2570 assert(q->tx_waiting);
2571 return;
2572 }
2573
2574 q->tx_waiting = 0;
2575
2576 /* Just in case the driver is not ready on more */
2577 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2578 return;
2579 }
2580
2581 ret = virtio_net_flush_tx(q);
2582 if (ret == -EBUSY || ret == -EINVAL) {
2583 return; /* Notification re-enable handled by tx_complete or device
2584 * broken */
2585 }
2586
2587 /* If we flush a full burst of packets, assume there are
2588 * more coming and immediately reschedule */
2589 if (ret >= n->tx_burst) {
2590 qemu_bh_schedule(q->tx_bh);
2591 q->tx_waiting = 1;
2592 return;
2593 }
2594
2595 /* If less than a full burst, re-enable notification and flush
2596 * anything that may have come in while we weren't looking. If
2597 * we find something, assume the guest is still active and reschedule */
2598 virtio_queue_set_notification(q->tx_vq, 1);
2599 ret = virtio_net_flush_tx(q);
2600 if (ret == -EINVAL) {
2601 return;
2602 } else if (ret > 0) {
2603 virtio_queue_set_notification(q->tx_vq, 0);
2604 qemu_bh_schedule(q->tx_bh);
2605 q->tx_waiting = 1;
2606 }
2607 }
2608
2609 static void virtio_net_add_queue(VirtIONet *n, int index)
2610 {
2611 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2612
2613 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2614 virtio_net_handle_rx);
2615
2616 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2617 n->vqs[index].tx_vq =
2618 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2619 virtio_net_handle_tx_timer);
2620 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2621 virtio_net_tx_timer,
2622 &n->vqs[index]);
2623 } else {
2624 n->vqs[index].tx_vq =
2625 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2626 virtio_net_handle_tx_bh);
2627 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2628 }
2629
2630 n->vqs[index].tx_waiting = 0;
2631 n->vqs[index].n = n;
2632 }
2633
2634 static void virtio_net_del_queue(VirtIONet *n, int index)
2635 {
2636 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2637 VirtIONetQueue *q = &n->vqs[index];
2638 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2639
2640 qemu_purge_queued_packets(nc);
2641
2642 virtio_del_queue(vdev, index * 2);
2643 if (q->tx_timer) {
2644 timer_del(q->tx_timer);
2645 timer_free(q->tx_timer);
2646 q->tx_timer = NULL;
2647 } else {
2648 qemu_bh_delete(q->tx_bh);
2649 q->tx_bh = NULL;
2650 }
2651 q->tx_waiting = 0;
2652 virtio_del_queue(vdev, index * 2 + 1);
2653 }
2654
2655 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2656 {
2657 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2658 int old_num_queues = virtio_get_num_queues(vdev);
2659 int new_num_queues = new_max_queues * 2 + 1;
2660 int i;
2661
2662 assert(old_num_queues >= 3);
2663 assert(old_num_queues % 2 == 1);
2664
2665 if (old_num_queues == new_num_queues) {
2666 return;
2667 }
2668
2669 /*
2670 * We always need to remove and add ctrl vq if
2671 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2672 * and then we only enter one of the following two loops.
2673 */
2674 virtio_del_queue(vdev, old_num_queues - 1);
2675
2676 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2677 /* new_num_queues < old_num_queues */
2678 virtio_net_del_queue(n, i / 2);
2679 }
2680
2681 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2682 /* new_num_queues > old_num_queues */
2683 virtio_net_add_queue(n, i / 2);
2684 }
2685
2686 /* add ctrl_vq last */
2687 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2688 }
2689
2690 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2691 {
2692 int max = multiqueue ? n->max_queues : 1;
2693
2694 n->multiqueue = multiqueue;
2695 virtio_net_change_num_queues(n, max);
2696
2697 virtio_net_set_queues(n);
2698 }
2699
2700 static int virtio_net_post_load_device(void *opaque, int version_id)
2701 {
2702 VirtIONet *n = opaque;
2703 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2704 int i, link_down;
2705
2706 trace_virtio_net_post_load_device();
2707 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2708 virtio_vdev_has_feature(vdev,
2709 VIRTIO_F_VERSION_1),
2710 virtio_vdev_has_feature(vdev,
2711 VIRTIO_NET_F_HASH_REPORT));
2712
2713 /* MAC_TABLE_ENTRIES may be different from the saved image */
2714 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2715 n->mac_table.in_use = 0;
2716 }
2717
2718 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2719 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2720 }
2721
2722 /*
2723 * curr_guest_offloads will be later overwritten by the
2724 * virtio_set_features_nocheck call done from the virtio_load.
2725 * Here we make sure it is preserved and restored accordingly
2726 * in the virtio_net_post_load_virtio callback.
2727 */
2728 n->saved_guest_offloads = n->curr_guest_offloads;
2729
2730 virtio_net_set_queues(n);
2731
2732 /* Find the first multicast entry in the saved MAC filter */
2733 for (i = 0; i < n->mac_table.in_use; i++) {
2734 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2735 break;
2736 }
2737 }
2738 n->mac_table.first_multi = i;
2739
2740 /* nc.link_down can't be migrated, so infer link_down according
2741 * to link status bit in n->status */
2742 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2743 for (i = 0; i < n->max_queues; i++) {
2744 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2745 }
2746
2747 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2748 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2749 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2750 QEMU_CLOCK_VIRTUAL,
2751 virtio_net_announce_timer, n);
2752 if (n->announce_timer.round) {
2753 timer_mod(n->announce_timer.tm,
2754 qemu_clock_get_ms(n->announce_timer.type));
2755 } else {
2756 qemu_announce_timer_del(&n->announce_timer, false);
2757 }
2758 }
2759
2760 if (n->rss_data.enabled) {
2761 trace_virtio_net_rss_enable(n->rss_data.hash_types,
2762 n->rss_data.indirections_len,
2763 sizeof(n->rss_data.key));
2764 } else {
2765 trace_virtio_net_rss_disable();
2766 }
2767 return 0;
2768 }
2769
2770 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2771 {
2772 VirtIONet *n = VIRTIO_NET(vdev);
2773 /*
2774 * The actual needed state is now in saved_guest_offloads,
2775 * see virtio_net_post_load_device for detail.
2776 * Restore it back and apply the desired offloads.
2777 */
2778 n->curr_guest_offloads = n->saved_guest_offloads;
2779 if (peer_has_vnet_hdr(n)) {
2780 virtio_net_apply_guest_offloads(n);
2781 }
2782
2783 return 0;
2784 }
2785
2786 /* tx_waiting field of a VirtIONetQueue */
2787 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2788 .name = "virtio-net-queue-tx_waiting",
2789 .fields = (VMStateField[]) {
2790 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2791 VMSTATE_END_OF_LIST()
2792 },
2793 };
2794
2795 static bool max_queues_gt_1(void *opaque, int version_id)
2796 {
2797 return VIRTIO_NET(opaque)->max_queues > 1;
2798 }
2799
2800 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2801 {
2802 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2803 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2804 }
2805
2806 static bool mac_table_fits(void *opaque, int version_id)
2807 {
2808 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2809 }
2810
2811 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2812 {
2813 return !mac_table_fits(opaque, version_id);
2814 }
2815
2816 /* This temporary type is shared by all the WITH_TMP methods
2817 * although only some fields are used by each.
2818 */
2819 struct VirtIONetMigTmp {
2820 VirtIONet *parent;
2821 VirtIONetQueue *vqs_1;
2822 uint16_t curr_queues_1;
2823 uint8_t has_ufo;
2824 uint32_t has_vnet_hdr;
2825 };
2826
2827 /* The 2nd and subsequent tx_waiting flags are loaded later than
2828 * the 1st entry in the queues and only if there's more than one
2829 * entry. We use the tmp mechanism to calculate a temporary
2830 * pointer and count and also validate the count.
2831 */
2832
2833 static int virtio_net_tx_waiting_pre_save(void *opaque)
2834 {
2835 struct VirtIONetMigTmp *tmp = opaque;
2836
2837 tmp->vqs_1 = tmp->parent->vqs + 1;
2838 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2839 if (tmp->parent->curr_queues == 0) {
2840 tmp->curr_queues_1 = 0;
2841 }
2842
2843 return 0;
2844 }
2845
2846 static int virtio_net_tx_waiting_pre_load(void *opaque)
2847 {
2848 struct VirtIONetMigTmp *tmp = opaque;
2849
2850 /* Reuse the pointer setup from save */
2851 virtio_net_tx_waiting_pre_save(opaque);
2852
2853 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2854 error_report("virtio-net: curr_queues %x > max_queues %x",
2855 tmp->parent->curr_queues, tmp->parent->max_queues);
2856
2857 return -EINVAL;
2858 }
2859
2860 return 0; /* all good */
2861 }
2862
2863 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2864 .name = "virtio-net-tx_waiting",
2865 .pre_load = virtio_net_tx_waiting_pre_load,
2866 .pre_save = virtio_net_tx_waiting_pre_save,
2867 .fields = (VMStateField[]) {
2868 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2869 curr_queues_1,
2870 vmstate_virtio_net_queue_tx_waiting,
2871 struct VirtIONetQueue),
2872 VMSTATE_END_OF_LIST()
2873 },
2874 };
2875
2876 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2877 * flag set we need to check that we have it
2878 */
2879 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2880 {
2881 struct VirtIONetMigTmp *tmp = opaque;
2882
2883 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2884 error_report("virtio-net: saved image requires TUN_F_UFO support");
2885 return -EINVAL;
2886 }
2887
2888 return 0;
2889 }
2890
2891 static int virtio_net_ufo_pre_save(void *opaque)
2892 {
2893 struct VirtIONetMigTmp *tmp = opaque;
2894
2895 tmp->has_ufo = tmp->parent->has_ufo;
2896
2897 return 0;
2898 }
2899
2900 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2901 .name = "virtio-net-ufo",
2902 .post_load = virtio_net_ufo_post_load,
2903 .pre_save = virtio_net_ufo_pre_save,
2904 .fields = (VMStateField[]) {
2905 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2906 VMSTATE_END_OF_LIST()
2907 },
2908 };
2909
2910 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2911 * flag set we need to check that we have it
2912 */
2913 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2914 {
2915 struct VirtIONetMigTmp *tmp = opaque;
2916
2917 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2918 error_report("virtio-net: saved image requires vnet_hdr=on");
2919 return -EINVAL;
2920 }
2921
2922 return 0;
2923 }
2924
2925 static int virtio_net_vnet_pre_save(void *opaque)
2926 {
2927 struct VirtIONetMigTmp *tmp = opaque;
2928
2929 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2930
2931 return 0;
2932 }
2933
2934 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2935 .name = "virtio-net-vnet",
2936 .post_load = virtio_net_vnet_post_load,
2937 .pre_save = virtio_net_vnet_pre_save,
2938 .fields = (VMStateField[]) {
2939 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2940 VMSTATE_END_OF_LIST()
2941 },
2942 };
2943
2944 static bool virtio_net_rss_needed(void *opaque)
2945 {
2946 return VIRTIO_NET(opaque)->rss_data.enabled;
2947 }
2948
2949 static const VMStateDescription vmstate_virtio_net_rss = {
2950 .name = "virtio-net-device/rss",
2951 .version_id = 1,
2952 .minimum_version_id = 1,
2953 .needed = virtio_net_rss_needed,
2954 .fields = (VMStateField[]) {
2955 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2956 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2957 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2958 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2959 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2960 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2961 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2962 VIRTIO_NET_RSS_MAX_KEY_SIZE),
2963 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2964 rss_data.indirections_len, 0,
2965 vmstate_info_uint16, uint16_t),
2966 VMSTATE_END_OF_LIST()
2967 },
2968 };
2969
2970 static const VMStateDescription vmstate_virtio_net_device = {
2971 .name = "virtio-net-device",
2972 .version_id = VIRTIO_NET_VM_VERSION,
2973 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2974 .post_load = virtio_net_post_load_device,
2975 .fields = (VMStateField[]) {
2976 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2977 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2978 vmstate_virtio_net_queue_tx_waiting,
2979 VirtIONetQueue),
2980 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2981 VMSTATE_UINT16(status, VirtIONet),
2982 VMSTATE_UINT8(promisc, VirtIONet),
2983 VMSTATE_UINT8(allmulti, VirtIONet),
2984 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2985
2986 /* Guarded pair: If it fits we load it, else we throw it away
2987 * - can happen if source has a larger MAC table.; post-load
2988 * sets flags in this case.
2989 */
2990 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2991 0, mac_table_fits, mac_table.in_use,
2992 ETH_ALEN),
2993 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2994 mac_table.in_use, ETH_ALEN),
2995
2996 /* Note: This is an array of uint32's that's always been saved as a
2997 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2998 * but based on the uint.
2999 */
3000 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3001 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3002 vmstate_virtio_net_has_vnet),
3003 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3004 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3005 VMSTATE_UINT8(alluni, VirtIONet),
3006 VMSTATE_UINT8(nomulti, VirtIONet),
3007 VMSTATE_UINT8(nouni, VirtIONet),
3008 VMSTATE_UINT8(nobcast, VirtIONet),
3009 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3010 vmstate_virtio_net_has_ufo),
3011 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3012 vmstate_info_uint16_equal, uint16_t),
3013 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3014 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3015 vmstate_virtio_net_tx_waiting),
3016 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3017 has_ctrl_guest_offloads),
3018 VMSTATE_END_OF_LIST()
3019 },
3020 .subsections = (const VMStateDescription * []) {
3021 &vmstate_virtio_net_rss,
3022 NULL
3023 }
3024 };
3025
3026 static NetClientInfo net_virtio_info = {
3027 .type = NET_CLIENT_DRIVER_NIC,
3028 .size = sizeof(NICState),
3029 .can_receive = virtio_net_can_receive,
3030 .receive = virtio_net_receive,
3031 .link_status_changed = virtio_net_set_link_status,
3032 .query_rx_filter = virtio_net_query_rxfilter,
3033 .announce = virtio_net_announce,
3034 };
3035
3036 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3037 {
3038 VirtIONet *n = VIRTIO_NET(vdev);
3039 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3040 assert(n->vhost_started);
3041 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3042 }
3043
3044 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3045 bool mask)
3046 {
3047 VirtIONet *n = VIRTIO_NET(vdev);
3048 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3049 assert(n->vhost_started);
3050 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3051 vdev, idx, mask);
3052 }
3053
3054 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3055 {
3056 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3057
3058 n->config_size = virtio_feature_get_config_size(feature_sizes,
3059 host_features);
3060 }
3061
3062 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3063 const char *type)
3064 {
3065 /*
3066 * The name can be NULL, the netclient name will be type.x.
3067 */
3068 assert(type != NULL);
3069
3070 g_free(n->netclient_name);
3071 g_free(n->netclient_type);
3072 n->netclient_name = g_strdup(name);
3073 n->netclient_type = g_strdup(type);
3074 }
3075
3076 static bool failover_unplug_primary(VirtIONet *n)
3077 {
3078 HotplugHandler *hotplug_ctrl;
3079 PCIDevice *pci_dev;
3080 Error *err = NULL;
3081
3082 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3083 if (hotplug_ctrl) {
3084 pci_dev = PCI_DEVICE(n->primary_dev);
3085 pci_dev->partially_hotplugged = true;
3086 hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3087 if (err) {
3088 error_report_err(err);
3089 return false;
3090 }
3091 } else {
3092 return false;
3093 }
3094 return true;
3095 }
3096
3097 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3098 {
3099 Error *err = NULL;
3100 HotplugHandler *hotplug_ctrl;
3101 PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3102
3103 if (!pdev->partially_hotplugged) {
3104 return true;
3105 }
3106 if (!n->primary_device_opts) {
3107 n->primary_device_opts = qemu_opts_from_qdict(
3108 qemu_find_opts("device"),
3109 n->primary_device_dict, errp);
3110 if (!n->primary_device_opts) {
3111 return false;
3112 }
3113 }
3114 n->primary_bus = n->primary_dev->parent_bus;
3115 if (!n->primary_bus) {
3116 error_setg(errp, "virtio_net: couldn't find primary bus");
3117 return false;
3118 }
3119 qdev_set_parent_bus(n->primary_dev, n->primary_bus);
3120 n->primary_should_be_hidden = false;
3121 qemu_opt_set_bool(n->primary_device_opts,
3122 "partially_hotplugged", true, &err);
3123 if (err) {
3124 goto out;
3125 }
3126 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3127 if (hotplug_ctrl) {
3128 hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3129 if (err) {
3130 goto out;
3131 }
3132 hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3133 }
3134
3135 out:
3136 error_propagate(errp, err);
3137 return !err;
3138 }
3139
3140 static void virtio_net_handle_migration_primary(VirtIONet *n,
3141 MigrationState *s)
3142 {
3143 bool should_be_hidden;
3144 Error *err = NULL;
3145
3146 should_be_hidden = atomic_read(&n->primary_should_be_hidden);
3147
3148 if (!n->primary_dev) {
3149 n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
3150 if (!n->primary_dev) {
3151 return;
3152 }
3153 }
3154
3155 if (migration_in_setup(s) && !should_be_hidden) {
3156 if (failover_unplug_primary(n)) {
3157 vmstate_unregister(VMSTATE_IF(n->primary_dev),
3158 qdev_get_vmsd(n->primary_dev),
3159 n->primary_dev);
3160 qapi_event_send_unplug_primary(n->primary_device_id);
3161 atomic_set(&n->primary_should_be_hidden, true);
3162 } else {
3163 warn_report("couldn't unplug primary device");
3164 }
3165 } else if (migration_has_failed(s)) {
3166 /* We already unplugged the device let's plug it back */
3167 if (!failover_replug_primary(n, &err)) {
3168 if (err) {
3169 error_report_err(err);
3170 }
3171 }
3172 }
3173 }
3174
3175 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3176 {
3177 MigrationState *s = data;
3178 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3179 virtio_net_handle_migration_primary(n, s);
3180 }
3181
3182 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3183 QemuOpts *device_opts)
3184 {
3185 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3186 bool match_found = false;
3187 bool hide = false;
3188
3189 if (!device_opts) {
3190 return -1;
3191 }
3192 n->primary_device_dict = qemu_opts_to_qdict(device_opts,
3193 n->primary_device_dict);
3194 if (n->primary_device_dict) {
3195 g_free(n->standby_id);
3196 n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
3197 "failover_pair_id"));
3198 }
3199 if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
3200 match_found = true;
3201 } else {
3202 match_found = false;
3203 hide = false;
3204 g_free(n->standby_id);
3205 n->primary_device_dict = NULL;
3206 goto out;
3207 }
3208
3209 n->primary_device_opts = device_opts;
3210
3211 /* primary_should_be_hidden is set during feature negotiation */
3212 hide = atomic_read(&n->primary_should_be_hidden);
3213
3214 if (n->primary_device_dict) {
3215 g_free(n->primary_device_id);
3216 n->primary_device_id = g_strdup(qdict_get_try_str(
3217 n->primary_device_dict, "id"));
3218 if (!n->primary_device_id) {
3219 warn_report("primary_device_id not set");
3220 }
3221 }
3222
3223 out:
3224 if (match_found && hide) {
3225 return 1;
3226 } else if (match_found && !hide) {
3227 return 0;
3228 } else {
3229 return -1;
3230 }
3231 }
3232
3233 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3234 {
3235 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3236 VirtIONet *n = VIRTIO_NET(dev);
3237 NetClientState *nc;
3238 int i;
3239
3240 if (n->net_conf.mtu) {
3241 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3242 }
3243
3244 if (n->net_conf.duplex_str) {
3245 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3246 n->net_conf.duplex = DUPLEX_HALF;
3247 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3248 n->net_conf.duplex = DUPLEX_FULL;
3249 } else {
3250 error_setg(errp, "'duplex' must be 'half' or 'full'");
3251 return;
3252 }
3253 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3254 } else {
3255 n->net_conf.duplex = DUPLEX_UNKNOWN;
3256 }
3257