vhost-vdpa: introduce vhost-vdpa backend
[qemu.git] / hw / net / virtio-net.c
1 /*
2 * Virtio Network Device
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
47
48 #define VIRTIO_NET_VM_VERSION 11
49
50 #define MAC_TABLE_ENTRIES 64
51 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
52
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60
61 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
62
63 #define VIRTIO_NET_TCP_FLAG 0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
65
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72
73 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75
76 /* Purge coalesced packets timer interval, This value affects the performance
77 a lot, and should be tuned carefully, '300000'(300us) is the recommended
78 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79 tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91
92 static VirtIOFeature feature_sizes[] = {
93 {.flags = 1ULL << VIRTIO_NET_F_MAC,
94 .end = endof(struct virtio_net_config, mac)},
95 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96 .end = endof(struct virtio_net_config, status)},
97 {.flags = 1ULL << VIRTIO_NET_F_MQ,
98 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99 {.flags = 1ULL << VIRTIO_NET_F_MTU,
100 .end = endof(struct virtio_net_config, mtu)},
101 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102 .end = endof(struct virtio_net_config, duplex)},
103 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104 .end = endof(struct virtio_net_config, supported_hash_types)},
105 {}
106 };
107
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 {
110 VirtIONet *n = qemu_get_nic_opaque(nc);
111
112 return &n->vqs[nc->queue_index];
113 }
114
115 static int vq2q(int queue_index)
116 {
117 return queue_index / 2;
118 }
119
120 /* TODO
121 * - we could suppress RX interrupt if we were so inclined.
122 */
123
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 {
126 VirtIONet *n = VIRTIO_NET(vdev);
127 struct virtio_net_config netcfg;
128
129 int ret = 0;
130 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
131 virtio_stw_p(vdev, &netcfg.status, n->status);
132 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
133 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
134 memcpy(netcfg.mac, n->mac, ETH_ALEN);
135 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
136 netcfg.duplex = n->net_conf.duplex;
137 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
138 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
139 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
140 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
141 virtio_stl_p(vdev, &netcfg.supported_hash_types,
142 VIRTIO_NET_RSS_SUPPORTED_HASHES);
143 memcpy(config, &netcfg, n->config_size);
144
145 NetClientState *nc = qemu_get_queue(n->nic);
146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
147 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
148 n->config_size);
149 if (ret != -1) {
150 memcpy(config, &netcfg, n->config_size);
151 }
152 }
153 }
154
155 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
156 {
157 VirtIONet *n = VIRTIO_NET(vdev);
158 struct virtio_net_config netcfg = {};
159
160 memcpy(&netcfg, config, n->config_size);
161
162 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
163 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
164 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
165 memcpy(n->mac, netcfg.mac, ETH_ALEN);
166 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
167 }
168
169 NetClientState *nc = qemu_get_queue(n->nic);
170 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
171 vhost_net_set_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
172 0, n->config_size,
173 VHOST_SET_CONFIG_TYPE_MASTER);
174 }
175 }
176
177 static bool virtio_net_started(VirtIONet *n, uint8_t status)
178 {
179 VirtIODevice *vdev = VIRTIO_DEVICE(n);
180 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
181 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
182 }
183
184 static void virtio_net_announce_notify(VirtIONet *net)
185 {
186 VirtIODevice *vdev = VIRTIO_DEVICE(net);
187 trace_virtio_net_announce_notify();
188
189 net->status |= VIRTIO_NET_S_ANNOUNCE;
190 virtio_notify_config(vdev);
191 }
192
193 static void virtio_net_announce_timer(void *opaque)
194 {
195 VirtIONet *n = opaque;
196 trace_virtio_net_announce_timer(n->announce_timer.round);
197
198 n->announce_timer.round--;
199 virtio_net_announce_notify(n);
200 }
201
202 static void virtio_net_announce(NetClientState *nc)
203 {
204 VirtIONet *n = qemu_get_nic_opaque(nc);
205 VirtIODevice *vdev = VIRTIO_DEVICE(n);
206
207 /*
208 * Make sure the virtio migration announcement timer isn't running
209 * If it is, let it trigger announcement so that we do not cause
210 * confusion.
211 */
212 if (n->announce_timer.round) {
213 return;
214 }
215
216 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
217 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
218 virtio_net_announce_notify(n);
219 }
220 }
221
222 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
223 {
224 VirtIODevice *vdev = VIRTIO_DEVICE(n);
225 NetClientState *nc = qemu_get_queue(n->nic);
226 int queues = n->multiqueue ? n->max_queues : 1;
227
228 if (!get_vhost_net(nc->peer)) {
229 return;
230 }
231
232 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
233 !!n->vhost_started) {
234 return;
235 }
236 if (!n->vhost_started) {
237 int r, i;
238
239 if (n->needs_vnet_hdr_swap) {
240 error_report("backend does not support %s vnet headers; "
241 "falling back on userspace virtio",
242 virtio_is_big_endian(vdev) ? "BE" : "LE");
243 return;
244 }
245
246 /* Any packets outstanding? Purge them to avoid touching rings
247 * when vhost is running.
248 */
249 for (i = 0; i < queues; i++) {
250 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
251
252 /* Purge both directions: TX and RX. */
253 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
254 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
255 }
256
257 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
258 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
259 if (r < 0) {
260 error_report("%uBytes MTU not supported by the backend",
261 n->net_conf.mtu);
262
263 return;
264 }
265 }
266
267 n->vhost_started = 1;
268 r = vhost_net_start(vdev, n->nic->ncs, queues);
269 if (r < 0) {
270 error_report("unable to start vhost net: %d: "
271 "falling back on userspace virtio", -r);
272 n->vhost_started = 0;
273 }
274 } else {
275 vhost_net_stop(vdev, n->nic->ncs, queues);
276 n->vhost_started = 0;
277 }
278 }
279
280 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
281 NetClientState *peer,
282 bool enable)
283 {
284 if (virtio_is_big_endian(vdev)) {
285 return qemu_set_vnet_be(peer, enable);
286 } else {
287 return qemu_set_vnet_le(peer, enable);
288 }
289 }
290
291 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
292 int queues, bool enable)
293 {
294 int i;
295
296 for (i = 0; i < queues; i++) {
297 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
298 enable) {
299 while (--i >= 0) {
300 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
301 }
302
303 return true;
304 }
305 }
306
307 return false;
308 }
309
310 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
311 {
312 VirtIODevice *vdev = VIRTIO_DEVICE(n);
313 int queues = n->multiqueue ? n->max_queues : 1;
314
315 if (virtio_net_started(n, status)) {
316 /* Before using the device, we tell the network backend about the
317 * endianness to use when parsing vnet headers. If the backend
318 * can't do it, we fallback onto fixing the headers in the core
319 * virtio-net code.
320 */
321 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
322 queues, true);
323 } else if (virtio_net_started(n, vdev->status)) {
324 /* After using the device, we need to reset the network backend to
325 * the default (guest native endianness), otherwise the guest may
326 * lose network connectivity if it is rebooted into a different
327 * endianness.
328 */
329 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
330 }
331 }
332
333 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
334 {
335 unsigned int dropped = virtqueue_drop_all(vq);
336 if (dropped) {
337 virtio_notify(vdev, vq);
338 }
339 }
340
341 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
342 {
343 VirtIONet *n = VIRTIO_NET(vdev);
344 VirtIONetQueue *q;
345 int i;
346 uint8_t queue_status;
347
348 virtio_net_vnet_endian_status(n, status);
349 virtio_net_vhost_status(n, status);
350
351 for (i = 0; i < n->max_queues; i++) {
352 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
353 bool queue_started;
354 q = &n->vqs[i];
355
356 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
357 queue_status = 0;
358 } else {
359 queue_status = status;
360 }
361 queue_started =
362 virtio_net_started(n, queue_status) && !n->vhost_started;
363
364 if (queue_started) {
365 qemu_flush_queued_packets(ncs);
366 }
367
368 if (!q->tx_waiting) {
369 continue;
370 }
371
372 if (queue_started) {
373 if (q->tx_timer) {
374 timer_mod(q->tx_timer,
375 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
376 } else {
377 qemu_bh_schedule(q->tx_bh);
378 }
379 } else {
380 if (q->tx_timer) {
381 timer_del(q->tx_timer);
382 } else {
383 qemu_bh_cancel(q->tx_bh);
384 }
385 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
386 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
387 vdev->vm_running) {
388 /* if tx is waiting we are likely have some packets in tx queue
389 * and disabled notification */
390 q->tx_waiting = 0;
391 virtio_queue_set_notification(q->tx_vq, 1);
392 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
393 }
394 }
395 }
396 }
397
398 static void virtio_net_set_link_status(NetClientState *nc)
399 {
400 VirtIONet *n = qemu_get_nic_opaque(nc);
401 VirtIODevice *vdev = VIRTIO_DEVICE(n);
402 uint16_t old_status = n->status;
403
404 if (nc->link_down)
405 n->status &= ~VIRTIO_NET_S_LINK_UP;
406 else
407 n->status |= VIRTIO_NET_S_LINK_UP;
408
409 if (n->status != old_status)
410 virtio_notify_config(vdev);
411
412 virtio_net_set_status(vdev, vdev->status);
413 }
414
415 static void rxfilter_notify(NetClientState *nc)
416 {
417 VirtIONet *n = qemu_get_nic_opaque(nc);
418
419 if (nc->rxfilter_notify_enabled) {
420 char *path = object_get_canonical_path(OBJECT(n->qdev));
421 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
422 n->netclient_name, path);
423 g_free(path);
424
425 /* disable event notification to avoid events flooding */
426 nc->rxfilter_notify_enabled = 0;
427 }
428 }
429
430 static intList *get_vlan_table(VirtIONet *n)
431 {
432 intList *list, *entry;
433 int i, j;
434
435 list = NULL;
436 for (i = 0; i < MAX_VLAN >> 5; i++) {
437 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
438 if (n->vlans[i] & (1U << j)) {
439 entry = g_malloc0(sizeof(*entry));
440 entry->value = (i << 5) + j;
441 entry->next = list;
442 list = entry;
443 }
444 }
445 }
446
447 return list;
448 }
449
450 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
451 {
452 VirtIONet *n = qemu_get_nic_opaque(nc);
453 VirtIODevice *vdev = VIRTIO_DEVICE(n);
454 RxFilterInfo *info;
455 strList *str_list, *entry;
456 int i;
457
458 info = g_malloc0(sizeof(*info));
459 info->name = g_strdup(nc->name);
460 info->promiscuous = n->promisc;
461
462 if (n->nouni) {
463 info->unicast = RX_STATE_NONE;
464 } else if (n->alluni) {
465 info->unicast = RX_STATE_ALL;
466 } else {
467 info->unicast = RX_STATE_NORMAL;
468 }
469
470 if (n->nomulti) {
471 info->multicast = RX_STATE_NONE;
472 } else if (n->allmulti) {
473 info->multicast = RX_STATE_ALL;
474 } else {
475 info->multicast = RX_STATE_NORMAL;
476 }
477
478 info->broadcast_allowed = n->nobcast;
479 info->multicast_overflow = n->mac_table.multi_overflow;
480 info->unicast_overflow = n->mac_table.uni_overflow;
481
482 info->main_mac = qemu_mac_strdup_printf(n->mac);
483
484 str_list = NULL;
485 for (i = 0; i < n->mac_table.first_multi; i++) {
486 entry = g_malloc0(sizeof(*entry));
487 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
488 entry->next = str_list;
489 str_list = entry;
490 }
491 info->unicast_table = str_list;
492
493 str_list = NULL;
494 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
495 entry = g_malloc0(sizeof(*entry));
496 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
497 entry->next = str_list;
498 str_list = entry;
499 }
500 info->multicast_table = str_list;
501 info->vlan_table = get_vlan_table(n);
502
503 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
504 info->vlan = RX_STATE_ALL;
505 } else if (!info->vlan_table) {
506 info->vlan = RX_STATE_NONE;
507 } else {
508 info->vlan = RX_STATE_NORMAL;
509 }
510
511 /* enable event notification after query */
512 nc->rxfilter_notify_enabled = 1;
513
514 return info;
515 }
516
517 static void virtio_net_reset(VirtIODevice *vdev)
518 {
519 VirtIONet *n = VIRTIO_NET(vdev);
520 int i;
521
522 /* Reset back to compatibility mode */
523 n->promisc = 1;
524 n->allmulti = 0;
525 n->alluni = 0;
526 n->nomulti = 0;
527 n->nouni = 0;
528 n->nobcast = 0;
529 /* multiqueue is disabled by default */
530 n->curr_queues = 1;
531 timer_del(n->announce_timer.tm);
532 n->announce_timer.round = 0;
533 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
534
535 /* Flush any MAC and VLAN filter table state */
536 n->mac_table.in_use = 0;
537 n->mac_table.first_multi = 0;
538 n->mac_table.multi_overflow = 0;
539 n->mac_table.uni_overflow = 0;
540 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
541 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
542 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
543 memset(n->vlans, 0, MAX_VLAN >> 3);
544
545 /* Flush any async TX */
546 for (i = 0; i < n->max_queues; i++) {
547 NetClientState *nc = qemu_get_subqueue(n->nic, i);
548
549 if (nc->peer) {
550 qemu_flush_or_purge_queued_packets(nc->peer, true);
551 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
552 }
553 }
554 }
555
556 static void peer_test_vnet_hdr(VirtIONet *n)
557 {
558 NetClientState *nc = qemu_get_queue(n->nic);
559 if (!nc->peer) {
560 return;
561 }
562
563 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
564 }
565
566 static int peer_has_vnet_hdr(VirtIONet *n)
567 {
568 return n->has_vnet_hdr;
569 }
570
571 static int peer_has_ufo(VirtIONet *n)
572 {
573 if (!peer_has_vnet_hdr(n))
574 return 0;
575
576 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
577
578 return n->has_ufo;
579 }
580
581 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
582 int version_1, int hash_report)
583 {
584 int i;
585 NetClientState *nc;
586
587 n->mergeable_rx_bufs = mergeable_rx_bufs;
588
589 if (version_1) {
590 n->guest_hdr_len = hash_report ?
591 sizeof(struct virtio_net_hdr_v1_hash) :
592 sizeof(struct virtio_net_hdr_mrg_rxbuf);
593 n->rss_data.populate_hash = !!hash_report;
594 } else {
595 n->guest_hdr_len = n->mergeable_rx_bufs ?
596 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
597 sizeof(struct virtio_net_hdr);
598 }
599
600 for (i = 0; i < n->max_queues; i++) {
601 nc = qemu_get_subqueue(n->nic, i);
602
603 if (peer_has_vnet_hdr(n) &&
604 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
605 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
606 n->host_hdr_len = n->guest_hdr_len;
607 }
608 }
609 }
610
611 static int virtio_net_max_tx_queue_size(VirtIONet *n)
612 {
613 NetClientState *peer = n->nic_conf.peers.ncs[0];
614
615 /*
616 * Backends other than vhost-user don't support max queue size.
617 */
618 if (!peer) {
619 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
620 }
621
622 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
623 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
624 }
625
626 return VIRTQUEUE_MAX_SIZE;
627 }
628
629 static int peer_attach(VirtIONet *n, int index)
630 {
631 NetClientState *nc = qemu_get_subqueue(n->nic, index);
632
633 if (!nc->peer) {
634 return 0;
635 }
636
637 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
638 vhost_set_vring_enable(nc->peer, 1);
639 }
640
641 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
642 return 0;
643 }
644
645 if (n->max_queues == 1) {
646 return 0;
647 }
648
649 return tap_enable(nc->peer);
650 }
651
652 static int peer_detach(VirtIONet *n, int index)
653 {
654 NetClientState *nc = qemu_get_subqueue(n->nic, index);
655
656 if (!nc->peer) {
657 return 0;
658 }
659
660 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
661 vhost_set_vring_enable(nc->peer, 0);
662 }
663
664 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
665 return 0;
666 }
667
668 return tap_disable(nc->peer);
669 }
670
671 static void virtio_net_set_queues(VirtIONet *n)
672 {
673 int i;
674 int r;
675
676 if (n->nic->peer_deleted) {
677 return;
678 }
679
680 for (i = 0; i < n->max_queues; i++) {
681 if (i < n->curr_queues) {
682 r = peer_attach(n, i);
683 assert(!r);
684 } else {
685 r = peer_detach(n, i);
686 assert(!r);
687 }
688 }
689 }
690
691 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
692
693 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
694 Error **errp)
695 {
696 VirtIONet *n = VIRTIO_NET(vdev);
697 NetClientState *nc = qemu_get_queue(n->nic);
698
699 /* Firstly sync all virtio-net possible supported features */
700 features |= n->host_features;
701
702 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
703
704 if (!peer_has_vnet_hdr(n)) {
705 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
706 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
707 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
708 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
709
710 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
711 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
712 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
713 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
714
715 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
716 }
717
718 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
719 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
720 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
721 }
722
723 if (!get_vhost_net(nc->peer)) {
724 return features;
725 }
726
727 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
728 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
729 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
730 vdev->backend_features = features;
731
732 if (n->mtu_bypass_backend &&
733 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
734 features |= (1ULL << VIRTIO_NET_F_MTU);
735 }
736
737 return features;
738 }
739
740 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
741 {
742 uint64_t features = 0;
743
744 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
745 * but also these: */
746 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
747 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
748 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
749 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
750 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
751
752 return features;
753 }
754
755 static void virtio_net_apply_guest_offloads(VirtIONet *n)
756 {
757 qemu_set_offload(qemu_get_queue(n->nic)->peer,
758 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
759 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
760 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
761 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
762 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
763 }
764
765 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
766 {
767 static const uint64_t guest_offloads_mask =
768 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
769 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
770 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
771 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
772 (1ULL << VIRTIO_NET_F_GUEST_UFO);
773
774 return guest_offloads_mask & features;
775 }
776
777 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
778 {
779 VirtIODevice *vdev = VIRTIO_DEVICE(n);
780 return virtio_net_guest_offloads_by_features(vdev->guest_features);
781 }
782
783 static void failover_add_primary(VirtIONet *n, Error **errp)
784 {
785 Error *err = NULL;
786
787 if (n->primary_dev) {
788 return;
789 }
790
791 n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
792 n->primary_device_id);
793 if (n->primary_device_opts) {
794 n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
795 if (err) {
796 qemu_opts_del(n->primary_device_opts);
797 }
798 if (n->primary_dev) {
799 n->primary_bus = n->primary_dev->parent_bus;
800 if (err) {
801 qdev_unplug(n->primary_dev, &err);
802 qdev_set_id(n->primary_dev, "");
803
804 }
805 }
806 } else {
807 error_setg(errp, "Primary device not found");
808 error_append_hint(errp, "Virtio-net failover will not work. Make "
809 "sure primary device has parameter"
810 " failover_pair_id=<virtio-net-id>\n");
811 }
812 if (err) {
813 error_propagate(errp, err);
814 }
815 }
816
817 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
818 {
819 VirtIONet *n = opaque;
820 int ret = 0;
821
822 const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
823
824 if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
825 n->primary_device_id = g_strdup(opts->id);
826 ret = 1;
827 }
828
829 return ret;
830 }
831
832 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
833 {
834 DeviceState *dev = NULL;
835 Error *err = NULL;
836
837 if (qemu_opts_foreach(qemu_find_opts("device"),
838 is_my_primary, n, &err)) {
839 if (err) {
840 error_propagate(errp, err);
841 return NULL;
842 }
843 if (n->primary_device_id) {
844 dev = qdev_find_recursive(sysbus_get_default(),
845 n->primary_device_id);
846 } else {
847 error_setg(errp, "Primary device id not found");
848 return NULL;
849 }
850 }
851 return dev;
852 }
853
854
855
856 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
857 DeviceState *dev,
858 Error **errp)
859 {
860 DeviceState *prim_dev = NULL;
861 Error *err = NULL;
862
863 prim_dev = virtio_net_find_primary(n, &err);
864 if (prim_dev) {
865 n->primary_device_id = g_strdup(prim_dev->id);
866 n->primary_device_opts = prim_dev->opts;
867 } else {
868 if (err) {
869 error_propagate(errp, err);
870 }
871 }
872
873 return prim_dev;
874 }
875
876 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
877 {
878 VirtIONet *n = VIRTIO_NET(vdev);
879 Error *err = NULL;
880 int i;
881
882 if (n->mtu_bypass_backend &&
883 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
884 features &= ~(1ULL << VIRTIO_NET_F_MTU);
885 }
886
887 virtio_net_set_multiqueue(n,
888 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
889 virtio_has_feature(features, VIRTIO_NET_F_MQ));
890
891 virtio_net_set_mrg_rx_bufs(n,
892 virtio_has_feature(features,
893 VIRTIO_NET_F_MRG_RXBUF),
894 virtio_has_feature(features,
895 VIRTIO_F_VERSION_1),
896 virtio_has_feature(features,
897 VIRTIO_NET_F_HASH_REPORT));
898
899 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
900 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
901 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
902 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
903 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
904
905 if (n->has_vnet_hdr) {
906 n->curr_guest_offloads =
907 virtio_net_guest_offloads_by_features(features);
908 virtio_net_apply_guest_offloads(n);
909 }
910
911 for (i = 0; i < n->max_queues; i++) {
912 NetClientState *nc = qemu_get_subqueue(n->nic, i);
913
914 if (!get_vhost_net(nc->peer)) {
915 continue;
916 }
917 vhost_net_ack_features(get_vhost_net(nc->peer), features);
918 }
919
920 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
921 memset(n->vlans, 0, MAX_VLAN >> 3);
922 } else {
923 memset(n->vlans, 0xff, MAX_VLAN >> 3);
924 }
925
926 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
927 qapi_event_send_failover_negotiated(n->netclient_name);
928 atomic_set(&n->primary_should_be_hidden, false);
929 failover_add_primary(n, &err);
930 if (err) {
931 n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
932 if (err) {
933 goto out_err;
934 }
935 failover_add_primary(n, &err);
936 if (err) {
937 goto out_err;
938 }
939 }
940 }
941 return;
942
943 out_err:
944 if (err) {
945 warn_report_err(err);
946 }
947 }
948
949 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
950 struct iovec *iov, unsigned int iov_cnt)
951 {
952 uint8_t on;
953 size_t s;
954 NetClientState *nc = qemu_get_queue(n->nic);
955
956 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
957 if (s != sizeof(on)) {
958 return VIRTIO_NET_ERR;
959 }
960
961 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
962 n->promisc = on;
963 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
964 n->allmulti = on;
965 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
966 n->alluni = on;
967 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
968 n->nomulti = on;
969 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
970 n->nouni = on;
971 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
972 n->nobcast = on;
973 } else {
974 return VIRTIO_NET_ERR;
975 }
976
977 rxfilter_notify(nc);
978
979 return VIRTIO_NET_OK;
980 }
981
982 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
983 struct iovec *iov, unsigned int iov_cnt)
984 {
985 VirtIODevice *vdev = VIRTIO_DEVICE(n);
986 uint64_t offloads;
987 size_t s;
988
989 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
990 return VIRTIO_NET_ERR;
991 }
992
993 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
994 if (s != sizeof(offloads)) {
995 return VIRTIO_NET_ERR;
996 }
997
998 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
999 uint64_t supported_offloads;
1000
1001 offloads = virtio_ldq_p(vdev, &offloads);
1002
1003 if (!n->has_vnet_hdr) {
1004 return VIRTIO_NET_ERR;
1005 }
1006
1007 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1008 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1009 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1010 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1011 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1012
1013 supported_offloads = virtio_net_supported_guest_offloads(n);
1014 if (offloads & ~supported_offloads) {
1015 return VIRTIO_NET_ERR;
1016 }
1017
1018 n->curr_guest_offloads = offloads;
1019 virtio_net_apply_guest_offloads(n);
1020
1021 return VIRTIO_NET_OK;
1022 } else {
1023 return VIRTIO_NET_ERR;
1024 }
1025 }
1026
1027 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1028 struct iovec *iov, unsigned int iov_cnt)
1029 {
1030 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1031 struct virtio_net_ctrl_mac mac_data;
1032 size_t s;
1033 NetClientState *nc = qemu_get_queue(n->nic);
1034
1035 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1036 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1037 return VIRTIO_NET_ERR;
1038 }
1039 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1040 assert(s == sizeof(n->mac));
1041 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1042 rxfilter_notify(nc);
1043
1044 return VIRTIO_NET_OK;
1045 }
1046
1047 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1048 return VIRTIO_NET_ERR;
1049 }
1050
1051 int in_use = 0;
1052 int first_multi = 0;
1053 uint8_t uni_overflow = 0;
1054 uint8_t multi_overflow = 0;
1055 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1056
1057 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1058 sizeof(mac_data.entries));
1059 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1060 if (s != sizeof(mac_data.entries)) {
1061 goto error;
1062 }
1063 iov_discard_front(&iov, &iov_cnt, s);
1064
1065 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1066 goto error;
1067 }
1068
1069 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1070 s = iov_to_buf(iov, iov_cnt, 0, macs,
1071 mac_data.entries * ETH_ALEN);
1072 if (s != mac_data.entries * ETH_ALEN) {
1073 goto error;
1074 }
1075 in_use += mac_data.entries;
1076 } else {
1077 uni_overflow = 1;
1078 }
1079
1080 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1081
1082 first_multi = in_use;
1083
1084 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1085 sizeof(mac_data.entries));
1086 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1087 if (s != sizeof(mac_data.entries)) {
1088 goto error;
1089 }
1090
1091 iov_discard_front(&iov, &iov_cnt, s);
1092
1093 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1094 goto error;
1095 }
1096
1097 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1098 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1099 mac_data.entries * ETH_ALEN);
1100 if (s != mac_data.entries * ETH_ALEN) {
1101 goto error;
1102 }
1103 in_use += mac_data.entries;
1104 } else {
1105 multi_overflow = 1;
1106 }
1107
1108 n->mac_table.in_use = in_use;
1109 n->mac_table.first_multi = first_multi;
1110 n->mac_table.uni_overflow = uni_overflow;
1111 n->mac_table.multi_overflow = multi_overflow;
1112 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1113 g_free(macs);
1114 rxfilter_notify(nc);
1115
1116 return VIRTIO_NET_OK;
1117
1118 error:
1119 g_free(macs);
1120 return VIRTIO_NET_ERR;
1121 }
1122
1123 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1124 struct iovec *iov, unsigned int iov_cnt)
1125 {
1126 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1127 uint16_t vid;
1128 size_t s;
1129 NetClientState *nc = qemu_get_queue(n->nic);
1130
1131 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1132 vid = virtio_lduw_p(vdev, &vid);
1133 if (s != sizeof(vid)) {
1134 return VIRTIO_NET_ERR;
1135 }
1136
1137 if (vid >= MAX_VLAN)
1138 return VIRTIO_NET_ERR;
1139
1140 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1141 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1142 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1143 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1144 else
1145 return VIRTIO_NET_ERR;
1146
1147 rxfilter_notify(nc);
1148
1149 return VIRTIO_NET_OK;
1150 }
1151
1152 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1153 struct iovec *iov, unsigned int iov_cnt)
1154 {
1155 trace_virtio_net_handle_announce(n->announce_timer.round);
1156 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1157 n->status & VIRTIO_NET_S_ANNOUNCE) {
1158 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1159 if (n->announce_timer.round) {
1160 qemu_announce_timer_step(&n->announce_timer);
1161 }
1162 return VIRTIO_NET_OK;
1163 } else {
1164 return VIRTIO_NET_ERR;
1165 }
1166 }
1167
1168 static void virtio_net_disable_rss(VirtIONet *n)
1169 {
1170 if (n->rss_data.enabled) {
1171 trace_virtio_net_rss_disable();
1172 }
1173 n->rss_data.enabled = false;
1174 }
1175
1176 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1177 struct iovec *iov,
1178 unsigned int iov_cnt,
1179 bool do_rss)
1180 {
1181 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1182 struct virtio_net_rss_config cfg;
1183 size_t s, offset = 0, size_get;
1184 uint16_t queues, i;
1185 struct {
1186 uint16_t us;
1187 uint8_t b;
1188 } QEMU_PACKED temp;
1189 const char *err_msg = "";
1190 uint32_t err_value = 0;
1191
1192 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1193 err_msg = "RSS is not negotiated";
1194 goto error;
1195 }
1196 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1197 err_msg = "Hash report is not negotiated";
1198 goto error;
1199 }
1200 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1201 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1202 if (s != size_get) {
1203 err_msg = "Short command buffer";
1204 err_value = (uint32_t)s;
1205 goto error;
1206 }
1207 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1208 n->rss_data.indirections_len =
1209 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1210 n->rss_data.indirections_len++;
1211 if (!do_rss) {
1212 n->rss_data.indirections_len = 1;
1213 }
1214 if (!is_power_of_2(n->rss_data.indirections_len)) {
1215 err_msg = "Invalid size of indirection table";
1216 err_value = n->rss_data.indirections_len;
1217 goto error;
1218 }
1219 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1220 err_msg = "Too large indirection table";
1221 err_value = n->rss_data.indirections_len;
1222 goto error;
1223 }
1224 n->rss_data.default_queue = do_rss ?
1225 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1226 if (n->rss_data.default_queue >= n->max_queues) {
1227 err_msg = "Invalid default queue";
1228 err_value = n->rss_data.default_queue;
1229 goto error;
1230 }
1231 offset += size_get;
1232 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1233 g_free(n->rss_data.indirections_table);
1234 n->rss_data.indirections_table = g_malloc(size_get);
1235 if (!n->rss_data.indirections_table) {
1236 err_msg = "Can't allocate indirections table";
1237 err_value = n->rss_data.indirections_len;
1238 goto error;
1239 }
1240 s = iov_to_buf(iov, iov_cnt, offset,
1241 n->rss_data.indirections_table, size_get);
1242 if (s != size_get) {
1243 err_msg = "Short indirection table buffer";
1244 err_value = (uint32_t)s;
1245 goto error;
1246 }
1247 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1248 uint16_t val = n->rss_data.indirections_table[i];
1249 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1250 }
1251 offset += size_get;
1252 size_get = sizeof(temp);
1253 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1254 if (s != size_get) {
1255 err_msg = "Can't get queues";
1256 err_value = (uint32_t)s;
1257 goto error;
1258 }
1259 queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1260 if (queues == 0 || queues > n->max_queues) {
1261 err_msg = "Invalid number of queues";
1262 err_value = queues;
1263 goto error;
1264 }
1265 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1266 err_msg = "Invalid key size";
1267 err_value = temp.b;
1268 goto error;
1269 }
1270 if (!temp.b && n->rss_data.hash_types) {
1271 err_msg = "No key provided";
1272 err_value = 0;
1273 goto error;
1274 }
1275 if (!temp.b && !n->rss_data.hash_types) {
1276 virtio_net_disable_rss(n);
1277 return queues;
1278 }
1279 offset += size_get;
1280 size_get = temp.b;
1281 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1282 if (s != size_get) {
1283 err_msg = "Can get key buffer";
1284 err_value = (uint32_t)s;
1285 goto error;
1286 }
1287 n->rss_data.enabled = true;
1288 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1289 n->rss_data.indirections_len,
1290 temp.b);
1291 return queues;
1292 error:
1293 trace_virtio_net_rss_error(err_msg, err_value);
1294 virtio_net_disable_rss(n);
1295 return 0;
1296 }
1297
1298 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1299 struct iovec *iov, unsigned int iov_cnt)
1300 {
1301 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1302 uint16_t queues;
1303
1304 virtio_net_disable_rss(n);
1305 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1306 queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1307 return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1308 }
1309 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1310 queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1311 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1312 struct virtio_net_ctrl_mq mq;
1313 size_t s;
1314 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1315 return VIRTIO_NET_ERR;
1316 }
1317 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1318 if (s != sizeof(mq)) {
1319 return VIRTIO_NET_ERR;
1320 }
1321 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1322
1323 } else {
1324 return VIRTIO_NET_ERR;
1325 }
1326
1327 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1328 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1329 queues > n->max_queues ||
1330 !n->multiqueue) {
1331 return VIRTIO_NET_ERR;
1332 }
1333
1334 n->curr_queues = queues;
1335 /* stop the backend before changing the number of queues to avoid handling a
1336 * disabled queue */
1337 virtio_net_set_status(vdev, vdev->status);
1338 virtio_net_set_queues(n);
1339
1340 return VIRTIO_NET_OK;
1341 }
1342
1343 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1344 {
1345 VirtIONet *n = VIRTIO_NET(vdev);
1346 struct virtio_net_ctrl_hdr ctrl;
1347 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1348 VirtQueueElement *elem;
1349 size_t s;
1350 struct iovec *iov, *iov2;
1351 unsigned int iov_cnt;
1352
1353 for (;;) {
1354 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1355 if (!elem) {
1356 break;
1357 }
1358 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1359 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1360 virtio_error(vdev, "virtio-net ctrl missing headers");
1361 virtqueue_detach_element(vq, elem, 0);
1362 g_free(elem);
1363 break;
1364 }
1365
1366 iov_cnt = elem->out_num;
1367 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1368 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1369 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1370 if (s != sizeof(ctrl)) {
1371 status = VIRTIO_NET_ERR;
1372 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1373 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1374 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1375 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1376 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1377 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1378 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1379 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1380 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1381 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1382 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1383 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1384 }
1385
1386 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1387 assert(s == sizeof(status));
1388
1389 virtqueue_push(vq, elem, sizeof(status));
1390 virtio_notify(vdev, vq);
1391 g_free(iov2);
1392 g_free(elem);
1393 }
1394 }
1395
1396 /* RX */
1397
1398 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1399 {
1400 VirtIONet *n = VIRTIO_NET(vdev);
1401 int queue_index = vq2q(virtio_get_queue_index(vq));
1402
1403 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1404 }
1405
1406 static bool virtio_net_can_receive(NetClientState *nc)
1407 {
1408 VirtIONet *n = qemu_get_nic_opaque(nc);
1409 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1410 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1411
1412 if (!vdev->vm_running) {
1413 return false;
1414 }
1415
1416 if (nc->queue_index >= n->curr_queues) {
1417 return false;
1418 }
1419
1420 if (!virtio_queue_ready(q->rx_vq) ||
1421 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1422 return false;
1423 }
1424
1425 return true;
1426 }
1427
1428 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1429 {
1430 VirtIONet *n = q->n;
1431 if (virtio_queue_empty(q->rx_vq) ||
1432 (n->mergeable_rx_bufs &&
1433 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1434 virtio_queue_set_notification(q->rx_vq, 1);
1435
1436 /* To avoid a race condition where the guest has made some buffers
1437 * available after the above check but before notification was
1438 * enabled, check for available buffers again.
1439 */
1440 if (virtio_queue_empty(q->rx_vq) ||
1441 (n->mergeable_rx_bufs &&
1442 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1443 return 0;
1444 }
1445 }
1446
1447 virtio_queue_set_notification(q->rx_vq, 0);
1448 return 1;
1449 }
1450
1451 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1452 {
1453 virtio_tswap16s(vdev, &hdr->hdr_len);
1454 virtio_tswap16s(vdev, &hdr->gso_size);
1455 virtio_tswap16s(vdev, &hdr->csum_start);
1456 virtio_tswap16s(vdev, &hdr->csum_offset);
1457 }
1458
1459 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1460 * it never finds out that the packets don't have valid checksums. This
1461 * causes dhclient to get upset. Fedora's carried a patch for ages to
1462 * fix this with Xen but it hasn't appeared in an upstream release of
1463 * dhclient yet.
1464 *
1465 * To avoid breaking existing guests, we catch udp packets and add
1466 * checksums. This is terrible but it's better than hacking the guest
1467 * kernels.
1468 *
1469 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1470 * we should provide a mechanism to disable it to avoid polluting the host
1471 * cache.
1472 */
1473 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1474 uint8_t *buf, size_t size)
1475 {
1476 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1477 (size > 27 && size < 1500) && /* normal sized MTU */
1478 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1479 (buf[23] == 17) && /* ip.protocol == UDP */
1480 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1481 net_checksum_calculate(buf, size);
1482 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1483 }
1484 }
1485
1486 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1487 const void *buf, size_t size)
1488 {
1489 if (n->has_vnet_hdr) {
1490 /* FIXME this cast is evil */
1491 void *wbuf = (void *)buf;
1492 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1493 size - n->host_hdr_len);
1494
1495 if (n->needs_vnet_hdr_swap) {
1496 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1497 }
1498 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1499 } else {
1500 struct virtio_net_hdr hdr = {
1501 .flags = 0,
1502 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1503 };
1504 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1505 }
1506 }
1507
1508 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1509 {
1510 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1511 static const uint8_t vlan[] = {0x81, 0x00};
1512 uint8_t *ptr = (uint8_t *)buf;
1513 int i;
1514
1515 if (n->promisc)
1516 return 1;
1517
1518 ptr += n->host_hdr_len;
1519
1520 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1521 int vid = lduw_be_p(ptr + 14) & 0xfff;
1522 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1523 return 0;
1524 }
1525
1526 if (ptr[0] & 1) { // multicast
1527 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1528 return !n->nobcast;
1529 } else if (n->nomulti) {
1530 return 0;
1531 } else if (n->allmulti || n->mac_table.multi_overflow) {
1532 return 1;
1533 }
1534
1535 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1536 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1537 return 1;
1538 }
1539 }
1540 } else { // unicast
1541 if (n->nouni) {
1542 return 0;
1543 } else if (n->alluni || n->mac_table.uni_overflow) {
1544 return 1;
1545 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1546 return 1;
1547 }
1548
1549 for (i = 0; i < n->mac_table.first_multi; i++) {
1550 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1551 return 1;
1552 }
1553 }
1554 }
1555
1556 return 0;
1557 }
1558
1559 static uint8_t virtio_net_get_hash_type(bool isip4,
1560 bool isip6,
1561 bool isudp,
1562 bool istcp,
1563 uint32_t types)
1564 {
1565 if (isip4) {
1566 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1567 return NetPktRssIpV4Tcp;
1568 }
1569 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1570 return NetPktRssIpV4Udp;
1571 }
1572 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1573 return NetPktRssIpV4;
1574 }
1575 } else if (isip6) {
1576 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1577 VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1578
1579 if (istcp && (types & mask)) {
1580 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1581 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1582 }
1583 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1584 if (isudp && (types & mask)) {
1585 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1586 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1587 }
1588 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1589 if (types & mask) {
1590 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1591 NetPktRssIpV6Ex : NetPktRssIpV6;
1592 }
1593 }
1594 return 0xff;
1595 }
1596
1597 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1598 uint32_t hash)
1599 {
1600 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1601 hdr->hash_value = hash;
1602 hdr->hash_report = report;
1603 }
1604
1605 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1606 size_t size)
1607 {
1608 VirtIONet *n = qemu_get_nic_opaque(nc);
1609 unsigned int index = nc->queue_index, new_index = index;
1610 struct NetRxPkt *pkt = n->rx_pkt;
1611 uint8_t net_hash_type;
1612 uint32_t hash;
1613 bool isip4, isip6, isudp, istcp;
1614 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1615 VIRTIO_NET_HASH_REPORT_IPv4,
1616 VIRTIO_NET_HASH_REPORT_TCPv4,
1617 VIRTIO_NET_HASH_REPORT_TCPv6,
1618 VIRTIO_NET_HASH_REPORT_IPv6,
1619 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1620 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1621 VIRTIO_NET_HASH_REPORT_UDPv4,
1622 VIRTIO_NET_HASH_REPORT_UDPv6,
1623 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1624 };
1625
1626 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1627 size - n->host_hdr_len);
1628 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1629 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1630 istcp = isudp = false;
1631 }
1632 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1633 istcp = isudp = false;
1634 }
1635 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1636 n->rss_data.hash_types);
1637 if (net_hash_type > NetPktRssIpV6UdpEx) {
1638 if (n->rss_data.populate_hash) {
1639 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1640 }
1641 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1642 }
1643
1644 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1645
1646 if (n->rss_data.populate_hash) {
1647 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1648 }
1649
1650 if (n->rss_data.redirect) {
1651 new_index = hash & (n->rss_data.indirections_len - 1);
1652 new_index = n->rss_data.indirections_table[new_index];
1653 }
1654
1655 return (index == new_index) ? -1 : new_index;
1656 }
1657
1658 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1659 size_t size, bool no_rss)
1660 {
1661 VirtIONet *n = qemu_get_nic_opaque(nc);
1662 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1663 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1664 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1665 struct virtio_net_hdr_mrg_rxbuf mhdr;
1666 unsigned mhdr_cnt = 0;
1667 size_t offset, i, guest_offset;
1668
1669 if (!virtio_net_can_receive(nc)) {
1670 return -1;
1671 }
1672
1673 if (!no_rss && n->rss_data.enabled) {
1674 int index = virtio_net_process_rss(nc, buf, size);
1675 if (index >= 0) {
1676 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1677 return virtio_net_receive_rcu(nc2, buf, size, true);
1678 }
1679 }
1680
1681 /* hdr_len refers to the header we supply to the guest */
1682 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1683 return 0;
1684 }
1685
1686 if (!receive_filter(n, buf, size))
1687 return size;
1688
1689 offset = i = 0;
1690
1691 while (offset < size) {
1692 VirtQueueElement *elem;
1693 int len, total;
1694 const struct iovec *sg;
1695
1696 total = 0;
1697
1698 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1699 if (!elem) {
1700 if (i) {
1701 virtio_error(vdev, "virtio-net unexpected empty queue: "
1702 "i %zd mergeable %d offset %zd, size %zd, "
1703 "guest hdr len %zd, host hdr len %zd "
1704 "guest features 0x%" PRIx64,
1705 i, n->mergeable_rx_bufs, offset, size,
1706 n->guest_hdr_len, n->host_hdr_len,
1707 vdev->guest_features);
1708 }
1709 return -1;
1710 }
1711
1712 if (elem->in_num < 1) {
1713 virtio_error(vdev,
1714 "virtio-net receive queue contains no in buffers");
1715 virtqueue_detach_element(q->rx_vq, elem, 0);
1716 g_free(elem);
1717 return -1;
1718 }
1719
1720 sg = elem->in_sg;
1721 if (i == 0) {
1722 assert(offset == 0);
1723 if (n->mergeable_rx_bufs) {
1724 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1725 sg, elem->in_num,
1726 offsetof(typeof(mhdr), num_buffers),
1727 sizeof(mhdr.num_buffers));
1728 }
1729
1730 receive_header(n, sg, elem->in_num, buf, size);
1731 if (n->rss_data.populate_hash) {
1732 offset = sizeof(mhdr);
1733 iov_from_buf(sg, elem->in_num, offset,
1734 buf + offset, n->host_hdr_len - sizeof(mhdr));
1735 }
1736 offset = n->host_hdr_len;
1737 total += n->guest_hdr_len;
1738 guest_offset = n->guest_hdr_len;
1739 } else {
1740 guest_offset = 0;
1741 }
1742
1743 /* copy in packet. ugh */
1744 len = iov_from_buf(sg, elem->in_num, guest_offset,
1745 buf + offset, size - offset);
1746 total += len;
1747 offset += len;
1748 /* If buffers can't be merged, at this point we
1749 * must have consumed the complete packet.
1750 * Otherwise, drop it. */
1751 if (!n->mergeable_rx_bufs && offset < size) {
1752 virtqueue_unpop(q->rx_vq, elem, total);
1753 g_free(elem);
1754 return size;
1755 }
1756
1757 /* signal other side */
1758 virtqueue_fill(q->rx_vq, elem, total, i++);
1759 g_free(elem);
1760 }
1761
1762 if (mhdr_cnt) {
1763 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1764 iov_from_buf(mhdr_sg, mhdr_cnt,
1765 0,
1766 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1767 }
1768
1769 virtqueue_flush(q->rx_vq, i);
1770 virtio_notify(vdev, q->rx_vq);
1771
1772 return size;
1773 }
1774
1775 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1776 size_t size)
1777 {
1778 RCU_READ_LOCK_GUARD();
1779
1780 return virtio_net_receive_rcu(nc, buf, size, false);
1781 }
1782
1783 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1784 const uint8_t *buf,
1785 VirtioNetRscUnit *unit)
1786 {
1787 uint16_t ip_hdrlen;
1788 struct ip_header *ip;
1789
1790 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1791 + sizeof(struct eth_header));
1792 unit->ip = (void *)ip;
1793 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1794 unit->ip_plen = &ip->ip_len;
1795 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1796 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1797 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1798 }
1799
1800 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1801 const uint8_t *buf,
1802 VirtioNetRscUnit *unit)
1803 {
1804 struct ip6_header *ip6;
1805
1806 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1807 + sizeof(struct eth_header));
1808 unit->ip = ip6;
1809 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1810 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1811 + sizeof(struct ip6_header));
1812 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1813
1814 /* There is a difference between payload lenght in ipv4 and v6,
1815 ip header is excluded in ipv6 */
1816 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1817 }
1818
1819 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1820 VirtioNetRscSeg *seg)
1821 {
1822 int ret;
1823 struct virtio_net_hdr_v1 *h;
1824
1825 h = (struct virtio_net_hdr_v1 *)seg->buf;
1826 h->flags = 0;
1827 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1828
1829 if (seg->is_coalesced) {
1830 h->rsc.segments = seg->packets;
1831 h->rsc.dup_acks = seg->dup_ack;
1832 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1833 if (chain->proto == ETH_P_IP) {
1834 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1835 } else {
1836 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1837 }
1838 }
1839
1840 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1841 QTAILQ_REMOVE(&chain->buffers, seg, next);
1842 g_free(seg->buf);
1843 g_free(seg);
1844
1845 return ret;
1846 }
1847
1848 static void virtio_net_rsc_purge(void *opq)
1849 {
1850 VirtioNetRscSeg *seg, *rn;
1851 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1852
1853 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1854 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1855 chain->stat.purge_failed++;
1856 continue;
1857 }
1858 }
1859
1860 chain->stat.timer++;
1861 if (!QTAILQ_EMPTY(&chain->buffers)) {
1862 timer_mod(chain->drain_timer,
1863 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1864 }
1865 }
1866
1867 static void virtio_net_rsc_cleanup(VirtIONet *n)
1868 {
1869 VirtioNetRscChain *chain, *rn_chain;
1870 VirtioNetRscSeg *seg, *rn_seg;
1871
1872 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1873 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1874 QTAILQ_REMOVE(&chain->buffers, seg, next);
1875 g_free(seg->buf);
1876 g_free(seg);
1877 }
1878
1879 timer_del(chain->drain_timer);
1880 timer_free(chain->drain_timer);
1881 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1882 g_free(chain);
1883 }
1884 }
1885
1886 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1887 NetClientState *nc,
1888 const uint8_t *buf, size_t size)
1889 {
1890 uint16_t hdr_len;
1891 VirtioNetRscSeg *seg;
1892
1893 hdr_len = chain->n->guest_hdr_len;
1894 seg = g_malloc(sizeof(VirtioNetRscSeg));
1895 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1896 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1897 memcpy(seg->buf, buf, size);
1898 seg->size = size;
1899 seg->packets = 1;
1900 seg->dup_ack = 0;
1901 seg->is_coalesced = 0;
1902 seg->nc = nc;
1903
1904 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1905 chain->stat.cache++;
1906
1907 switch (chain->proto) {
1908 case ETH_P_IP:
1909 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1910 break;
1911 case ETH_P_IPV6:
1912 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1913 break;
1914 default:
1915 g_assert_not_reached();
1916 }
1917 }
1918
1919 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1920 VirtioNetRscSeg *seg,
1921 const uint8_t *buf,
1922 struct tcp_header *n_tcp,
1923 struct tcp_header *o_tcp)
1924 {
1925 uint32_t nack, oack;
1926 uint16_t nwin, owin;
1927
1928 nack = htonl(n_tcp->th_ack);
1929 nwin = htons(n_tcp->th_win);
1930 oack = htonl(o_tcp->th_ack);
1931 owin = htons(o_tcp->th_win);
1932
1933 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1934 chain->stat.ack_out_of_win++;
1935 return RSC_FINAL;
1936 } else if (nack == oack) {
1937 /* duplicated ack or window probe */
1938 if (nwin == owin) {
1939 /* duplicated ack, add dup ack count due to whql test up to 1 */
1940 chain->stat.dup_ack++;
1941 return RSC_FINAL;
1942 } else {
1943 /* Coalesce window update */
1944 o_tcp->th_win = n_tcp->th_win;
1945 chain->stat.win_update++;
1946 return RSC_COALESCE;
1947 }
1948 } else {
1949 /* pure ack, go to 'C', finalize*/
1950 chain->stat.pure_ack++;
1951 return RSC_FINAL;
1952 }
1953 }
1954
1955 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1956 VirtioNetRscSeg *seg,
1957 const uint8_t *buf,
1958 VirtioNetRscUnit *n_unit)
1959 {
1960 void *data;
1961 uint16_t o_ip_len;
1962 uint32_t nseq, oseq;
1963 VirtioNetRscUnit *o_unit;
1964
1965 o_unit = &seg->unit;
1966 o_ip_len = htons(*o_unit->ip_plen);
1967 nseq = htonl(n_unit->tcp->th_seq);
1968 oseq = htonl(o_unit->tcp->th_seq);
1969
1970 /* out of order or retransmitted. */
1971 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1972 chain->stat.data_out_of_win++;
1973 return RSC_FINAL;
1974 }
1975
1976 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1977 if (nseq == oseq) {
1978 if ((o_unit->payload == 0) && n_unit->payload) {
1979 /* From no payload to payload, normal case, not a dup ack or etc */
1980 chain->stat.data_after_pure_ack++;
1981 goto coalesce;
1982 } else {
1983 return virtio_net_rsc_handle_ack(chain, seg, buf,
1984 n_unit->tcp, o_unit->tcp);
1985 }
1986 } else if ((nseq - oseq) != o_unit->payload) {
1987 /* Not a consistent packet, out of order */
1988 chain->stat.data_out_of_order++;
1989 return RSC_FINAL;
1990 } else {
1991 coalesce:
1992 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1993 chain->stat.over_size++;
1994 return RSC_FINAL;
1995 }
1996
1997 /* Here comes the right data, the payload length in v4/v6 is different,
1998 so use the field value to update and record the new data len */
1999 o_unit->payload += n_unit->payload; /* update new data len */
2000
2001 /* update field in ip header */
2002 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2003
2004 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2005 for windows guest, while this may change the behavior for linux
2006 guest (only if it uses RSC feature). */
2007 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2008
2009 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2010 o_unit->tcp->th_win = n_unit->tcp->th_win;
2011
2012 memmove(seg->buf + seg->size, data, n_unit->payload);
2013 seg->size += n_unit->payload;
2014 seg->packets++;
2015 chain->stat.coalesced++;
2016 return RSC_COALESCE;
2017 }
2018 }
2019
2020 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2021 VirtioNetRscSeg *seg,
2022 const uint8_t *buf, size_t size,
2023 VirtioNetRscUnit *unit)
2024 {
2025 struct ip_header *ip1, *ip2;
2026
2027 ip1 = (struct ip_header *)(unit->ip);
2028 ip2 = (struct ip_header *)(seg->unit.ip);
2029 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2030 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2031 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2032 chain->stat.no_match++;
2033 return RSC_NO_MATCH;
2034 }
2035
2036 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2037 }
2038
2039 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2040 VirtioNetRscSeg *seg,
2041 const uint8_t *buf, size_t size,
2042 VirtioNetRscUnit *unit)
2043 {
2044 struct ip6_header *ip1, *ip2;
2045
2046 ip1 = (struct ip6_header *)(unit->ip);
2047 ip2 = (struct ip6_header *)(seg->unit.ip);
2048 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2049 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2050 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2051 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2052 chain->stat.no_match++;
2053 return RSC_NO_MATCH;
2054 }
2055
2056 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2057 }
2058
2059 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2060 * to prevent out of order */
2061 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2062 struct tcp_header *tcp)
2063 {
2064 uint16_t tcp_hdr;
2065 uint16_t tcp_flag;
2066
2067 tcp_flag = htons(tcp->th_offset_flags);
2068 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2069 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2070 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
2071 if (tcp_flag & TH_SYN) {
2072 chain->stat.tcp_syn++;
2073 return RSC_BYPASS;
2074 }
2075
2076 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2077 chain->stat.tcp_ctrl_drain++;
2078 return RSC_FINAL;
2079 }
2080
2081 if (tcp_hdr > sizeof(struct tcp_header)) {
2082 chain->stat.tcp_all_opt++;
2083 return RSC_FINAL;
2084 }
2085
2086 return RSC_CANDIDATE;
2087 }
2088
2089 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2090 NetClientState *nc,
2091 const uint8_t *buf, size_t size,
2092 VirtioNetRscUnit *unit)
2093 {
2094 int ret;
2095 VirtioNetRscSeg *seg, *nseg;
2096
2097 if (QTAILQ_EMPTY(&chain->buffers)) {
2098 chain->stat.empty_cache++;
2099 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2100 timer_mod(chain->drain_timer,
2101 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2102 return size;
2103 }
2104
2105 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2106 if (chain->proto == ETH_P_IP) {
2107 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2108 } else {
2109 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2110 }
2111
2112 if (ret == RSC_FINAL) {
2113 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2114 /* Send failed */
2115 chain->stat.final_failed++;
2116 return 0;
2117 }
2118
2119 /* Send current packet */
2120 return virtio_net_do_receive(nc, buf, size);
2121 } else if (ret == RSC_NO_MATCH) {
2122 continue;
2123 } else {
2124 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2125 seg->is_coalesced = 1;
2126 return size;
2127 }
2128 }
2129
2130 chain->stat.no_match_cache++;
2131 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2132 return size;
2133 }
2134
2135 /* Drain a connection data, this is to avoid out of order segments */
2136 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2137 NetClientState *nc,
2138 const uint8_t *buf, size_t size,
2139 uint16_t ip_start, uint16_t ip_size,
2140 uint16_t tcp_port)
2141 {
2142 VirtioNetRscSeg *seg, *nseg;
2143 uint32_t ppair1, ppair2;
2144
2145 ppair1 = *(uint32_t *)(buf + tcp_port);
2146 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2147 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2148 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2149 || (ppair1 != ppair2)) {
2150 continue;
2151 }
2152 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2153 chain->stat.drain_failed++;
2154 }
2155
2156 break;
2157 }
2158
2159 return virtio_net_do_receive(nc, buf, size);
2160 }
2161
2162 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2163 struct ip_header *ip,
2164 const uint8_t *buf, size_t size)
2165 {
2166 uint16_t ip_len;
2167
2168 /* Not an ipv4 packet */
2169 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2170 chain->stat.ip_option++;
2171 return RSC_BYPASS;
2172 }
2173
2174 /* Don't handle packets with ip option */
2175 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2176 chain->stat.ip_option++;
2177 return RSC_BYPASS;
2178 }
2179
2180 if (ip->ip_p != IPPROTO_TCP) {
2181 chain->stat.bypass_not_tcp++;
2182 return RSC_BYPASS;
2183 }
2184
2185 /* Don't handle packets with ip fragment */
2186 if (!(htons(ip->ip_off) & IP_DF)) {
2187 chain->stat.ip_frag++;
2188 return RSC_BYPASS;
2189 }
2190
2191 /* Don't handle packets with ecn flag */
2192 if (IPTOS_ECN(ip->ip_tos)) {
2193 chain->stat.ip_ecn++;
2194 return RSC_BYPASS;
2195 }
2196
2197 ip_len = htons(ip->ip_len);
2198 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2199 || ip_len > (size - chain->n->guest_hdr_len -
2200 sizeof(struct eth_header))) {
2201 chain->stat.ip_hacked++;
2202 return RSC_BYPASS;
2203 }
2204
2205 return RSC_CANDIDATE;
2206 }
2207
2208 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2209 NetClientState *nc,
2210 const uint8_t *buf, size_t size)
2211 {
2212 int32_t ret;
2213 uint16_t hdr_len;
2214 VirtioNetRscUnit unit;
2215
2216 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2217
2218 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2219 + sizeof(struct tcp_header))) {
2220 chain->stat.bypass_not_tcp++;
2221 return virtio_net_do_receive(nc, buf, size);
2222 }
2223
2224 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2225 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2226 != RSC_CANDIDATE) {
2227 return virtio_net_do_receive(nc, buf, size);
2228 }
2229
2230 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2231 if (ret == RSC_BYPASS) {
2232 return virtio_net_do_receive(nc, buf, size);
2233 } else if (ret == RSC_FINAL) {
2234 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2235 ((hdr_len + sizeof(struct eth_header)) + 12),
2236 VIRTIO_NET_IP4_ADDR_SIZE,
2237 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2238 }
2239
2240 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2241 }
2242
2243 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2244 struct ip6_header *ip6,
2245 const uint8_t *buf, size_t size)
2246 {
2247 uint16_t ip_len;
2248
2249 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2250 != IP_HEADER_VERSION_6) {
2251 return RSC_BYPASS;
2252 }
2253
2254 /* Both option and protocol is checked in this */
2255 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2256 chain->stat.bypass_not_tcp++;
2257 return RSC_BYPASS;
2258 }
2259
2260 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2261 if (ip_len < sizeof(struct tcp_header) ||
2262 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2263 - sizeof(struct ip6_header))) {
2264 chain->stat.ip_hacked++;
2265 return RSC_BYPASS;
2266 }
2267
2268 /* Don't handle packets with ecn flag */
2269 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2270 chain->stat.ip_ecn++;
2271 return RSC_BYPASS;
2272 }
2273
2274 return RSC_CANDIDATE;
2275 }
2276
2277 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2278 const uint8_t *buf, size_t size)
2279 {
2280 int32_t ret;
2281 uint16_t hdr_len;
2282 VirtioNetRscChain *chain;
2283 VirtioNetRscUnit unit;
2284
2285 chain = (VirtioNetRscChain *)opq;
2286 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2287
2288 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2289 + sizeof(tcp_header))) {
2290 return virtio_net_do_receive(nc, buf, size);
2291 }
2292
2293 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2294 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2295 unit.ip, buf, size)) {
2296 return virtio_net_do_receive(nc, buf, size);
2297 }
2298
2299 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2300 if (ret == RSC_BYPASS) {
2301 return virtio_net_do_receive(nc, buf, size);
2302 } else if (ret == RSC_FINAL) {
2303 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2304 ((hdr_len + sizeof(struct eth_header)) + 8),
2305 VIRTIO_NET_IP6_ADDR_SIZE,
2306 hdr_len + sizeof(struct eth_header)
2307 + sizeof(struct ip6_header));
2308 }
2309
2310 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2311 }
2312
2313 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2314 NetClientState *nc,
2315 uint16_t proto)
2316 {
2317 VirtioNetRscChain *chain;
2318
2319 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2320 return NULL;
2321 }
2322
2323 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2324 if (chain->proto == proto) {
2325 return chain;
2326 }
2327 }
2328
2329 chain = g_malloc(sizeof(*chain));
2330 chain->n = n;
2331 chain->proto = proto;
2332 if (proto == (uint16_t)ETH_P_IP) {
2333 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2334 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2335 } else {
2336 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2337 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2338 }
2339 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2340 virtio_net_rsc_purge, chain);
2341 memset(&chain->stat, 0, sizeof(chain->stat));
2342
2343 QTAILQ_INIT(&chain->buffers);
2344 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2345
2346 return chain;
2347 }
2348
2349 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2350 const uint8_t *buf,
2351 size_t size)
2352 {
2353 uint16_t proto;
2354 VirtioNetRscChain *chain;
2355 struct eth_header *eth;
2356 VirtIONet *n;
2357
2358 n = qemu_get_nic_opaque(nc);
2359 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2360 return virtio_net_do_receive(nc, buf, size);
2361 }
2362
2363 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2364 proto = htons(eth->h_proto);
2365
2366 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2367 if (chain) {
2368 chain->stat.received++;
2369 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2370 return virtio_net_rsc_receive4(chain, nc, buf, size);
2371 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2372 return virtio_net_rsc_receive6(chain, nc, buf, size);
2373 }
2374 }
2375 return virtio_net_do_receive(nc, buf, size);
2376 }
2377
2378 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2379 size_t size)
2380 {
2381 VirtIONet *n = qemu_get_nic_opaque(nc);
2382 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2383 return virtio_net_rsc_receive(nc, buf, size);
2384 } else {
2385 return virtio_net_do_receive(nc, buf, size);
2386 }
2387 }
2388
2389 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2390
2391 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2392 {
2393 VirtIONet *n = qemu_get_nic_opaque(nc);
2394 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2395 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2396
2397 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2398 virtio_notify(vdev, q->tx_vq);
2399
2400 g_free(q->async_tx.elem);
2401 q->async_tx.elem = NULL;
2402
2403 virtio_queue_set_notification(q->tx_vq, 1);
2404 virtio_net_flush_tx(q);
2405 }
2406
2407 /* TX */
2408 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2409 {
2410 VirtIONet *n = q->n;
2411 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2412 VirtQueueElement *elem;
2413 int32_t num_packets = 0;
2414 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2415 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2416 return num_packets;
2417 }
2418
2419 if (q->async_tx.elem) {
2420 virtio_queue_set_notification(q->tx_vq, 0);
2421 return num_packets;
2422 }
2423
2424 for (;;) {
2425 ssize_t ret;
2426 unsigned int out_num;
2427 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2428 struct virtio_net_hdr_mrg_rxbuf mhdr;
2429
2430 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2431 if (!elem) {
2432 break;
2433 }
2434
2435 out_num = elem->out_num;
2436 out_sg = elem->out_sg;
2437 if (out_num < 1) {
2438 virtio_error(vdev, "virtio-net header not in first element");
2439 virtqueue_detach_element(q->tx_vq, elem, 0);
2440 g_free(elem);
2441 return -EINVAL;
2442 }
2443
2444 if (n->has_vnet_hdr) {
2445 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2446 n->guest_hdr_len) {
2447 virtio_error(vdev, "virtio-net header incorrect");
2448 virtqueue_detach_element(q->tx_vq, elem, 0);
2449 g_free(elem);
2450 return -EINVAL;
2451 }
2452 if (n->needs_vnet_hdr_swap) {
2453 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2454 sg2[0].iov_base = &mhdr;
2455 sg2[0].iov_len = n->guest_hdr_len;
2456 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2457 out_sg, out_num,
2458 n->guest_hdr_len, -1);
2459 if (out_num == VIRTQUEUE_MAX_SIZE) {
2460 goto drop;
2461 }
2462 out_num += 1;
2463 out_sg = sg2;
2464 }
2465 }
2466 /*
2467 * If host wants to see the guest header as is, we can
2468 * pass it on unchanged. Otherwise, copy just the parts
2469 * that host is interested in.
2470 */
2471 assert(n->host_hdr_len <= n->guest_hdr_len);
2472 if (n->host_hdr_len != n->guest_hdr_len) {
2473 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2474 out_sg, out_num,
2475 0, n->host_hdr_len);
2476 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2477 out_sg, out_num,
2478 n->guest_hdr_len, -1);
2479 out_num = sg_num;
2480 out_sg = sg;
2481 }
2482
2483 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2484 out_sg, out_num, virtio_net_tx_complete);
2485 if (ret == 0) {
2486 virtio_queue_set_notification(q->tx_vq, 0);
2487 q->async_tx.elem = elem;
2488 return -EBUSY;
2489 }
2490
2491 drop:
2492 virtqueue_push(q->tx_vq, elem, 0);
2493 virtio_notify(vdev, q->tx_vq);
2494 g_free(elem);
2495
2496 if (++num_packets >= n->tx_burst) {
2497 break;
2498 }
2499 }
2500 return num_packets;
2501 }
2502
2503 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2504 {
2505 VirtIONet *n = VIRTIO_NET(vdev);
2506 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2507
2508 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2509 virtio_net_drop_tx_queue_data(vdev, vq);
2510 return;
2511 }
2512
2513 /* This happens when device was stopped but VCPU wasn't. */
2514 if (!vdev->vm_running) {
2515 q->tx_waiting = 1;
2516 return;
2517 }
2518
2519 if (q->tx_waiting) {
2520 virtio_queue_set_notification(vq, 1);
2521 timer_del(q->tx_timer);
2522 q->tx_waiting = 0;
2523 if (virtio_net_flush_tx(q) == -EINVAL) {
2524 return;
2525 }
2526 } else {
2527 timer_mod(q->tx_timer,
2528 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2529 q->tx_waiting = 1;
2530 virtio_queue_set_notification(vq, 0);
2531 }
2532 }
2533
2534 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2535 {
2536 VirtIONet *n = VIRTIO_NET(vdev);
2537 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2538
2539 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2540 virtio_net_drop_tx_queue_data(vdev, vq);
2541 return;
2542 }
2543
2544 if (unlikely(q->tx_waiting)) {
2545 return;
2546 }
2547 q->tx_waiting = 1;
2548 /* This happens when device was stopped but VCPU wasn't. */
2549 if (!vdev->vm_running) {
2550 return;
2551 }
2552 virtio_queue_set_notification(vq, 0);
2553 qemu_bh_schedule(q->tx_bh);
2554 }
2555
2556 static void virtio_net_tx_timer(void *opaque)
2557 {
2558 VirtIONetQueue *q = opaque;
2559 VirtIONet *n = q->n;
2560 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2561 /* This happens when device was stopped but BH wasn't. */
2562 if (!vdev->vm_running) {
2563 /* Make sure tx waiting is set, so we'll run when restarted. */
2564 assert(q->tx_waiting);
2565 return;
2566 }
2567
2568 q->tx_waiting = 0;
2569
2570 /* Just in case the driver is not ready on more */
2571 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2572 return;
2573 }
2574
2575 virtio_queue_set_notification(q->tx_vq, 1);
2576 virtio_net_flush_tx(q);
2577 }
2578
2579 static void virtio_net_tx_bh(void *opaque)
2580 {
2581 VirtIONetQueue *q = opaque;
2582 VirtIONet *n = q->n;
2583 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2584 int32_t ret;
2585
2586 /* This happens when device was stopped but BH wasn't. */
2587 if (!vdev->vm_running) {
2588 /* Make sure tx waiting is set, so we'll run when restarted. */
2589 assert(q->tx_waiting);
2590 return;
2591 }
2592
2593 q->tx_waiting = 0;
2594
2595 /* Just in case the driver is not ready on more */
2596 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2597 return;
2598 }
2599
2600 ret = virtio_net_flush_tx(q);
2601 if (ret == -EBUSY || ret == -EINVAL) {
2602 return; /* Notification re-enable handled by tx_complete or device
2603 * broken */
2604 }
2605
2606 /* If we flush a full burst of packets, assume there are
2607 * more coming and immediately reschedule */
2608 if (ret >= n->tx_burst) {
2609 qemu_bh_schedule(q->tx_bh);
2610 q->tx_waiting = 1;
2611 return;
2612 }
2613
2614 /* If less than a full burst, re-enable notification and flush
2615 * anything that may have come in while we weren't looking. If
2616 * we find something, assume the guest is still active and reschedule */
2617 virtio_queue_set_notification(q->tx_vq, 1);
2618 ret = virtio_net_flush_tx(q);
2619 if (ret == -EINVAL) {
2620 return;
2621 } else if (ret > 0) {
2622 virtio_queue_set_notification(q->tx_vq, 0);
2623 qemu_bh_schedule(q->tx_bh);
2624 q->tx_waiting = 1;
2625 }
2626 }
2627
2628 static void virtio_net_add_queue(VirtIONet *n, int index)
2629 {
2630 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2631
2632 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2633 virtio_net_handle_rx);
2634
2635 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2636 n->vqs[index].tx_vq =
2637 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2638 virtio_net_handle_tx_timer);
2639 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2640 virtio_net_tx_timer,
2641 &n->vqs[index]);
2642 } else {
2643 n->vqs[index].tx_vq =
2644 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2645 virtio_net_handle_tx_bh);
2646 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2647 }
2648
2649 n->vqs[index].tx_waiting = 0;
2650 n->vqs[index].n = n;
2651 }
2652
2653 static void virtio_net_del_queue(VirtIONet *n, int index)
2654 {
2655 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2656 VirtIONetQueue *q = &n->vqs[index];
2657 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2658
2659 qemu_purge_queued_packets(nc);
2660
2661 virtio_del_queue(vdev, index * 2);
2662 if (q->tx_timer) {
2663 timer_del(q->tx_timer);
2664 timer_free(q->tx_timer);
2665 q->tx_timer = NULL;
2666 } else {
2667 qemu_bh_delete(q->tx_bh);
2668 q->tx_bh = NULL;
2669 }
2670 q->tx_waiting = 0;
2671 virtio_del_queue(vdev, index * 2 + 1);
2672 }
2673
2674 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2675 {
2676 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2677 int old_num_queues = virtio_get_num_queues(vdev);
2678 int new_num_queues = new_max_queues * 2 + 1;
2679 int i;
2680
2681 assert(old_num_queues >= 3);
2682 assert(old_num_queues % 2 == 1);
2683
2684 if (old_num_queues == new_num_queues) {
2685 return;
2686 }
2687
2688 /*
2689 * We always need to remove and add ctrl vq if
2690 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2691 * and then we only enter one of the following two loops.
2692 */
2693 virtio_del_queue(vdev, old_num_queues - 1);
2694
2695 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2696 /* new_num_queues < old_num_queues */
2697 virtio_net_del_queue(n, i / 2);
2698 }
2699
2700 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2701 /* new_num_queues > old_num_queues */
2702 virtio_net_add_queue(n, i / 2);
2703 }
2704
2705 /* add ctrl_vq last */
2706 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2707 }
2708
2709 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2710 {
2711 int max = multiqueue ? n->max_queues : 1;
2712
2713 n->multiqueue = multiqueue;
2714 virtio_net_change_num_queues(n, max);
2715
2716 virtio_net_set_queues(n);
2717 }
2718
2719 static int virtio_net_post_load_device(void *opaque, int version_id)
2720 {
2721 VirtIONet *n = opaque;
2722 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2723 int i, link_down;
2724
2725 trace_virtio_net_post_load_device();
2726 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2727 virtio_vdev_has_feature(vdev,
2728 VIRTIO_F_VERSION_1),
2729 virtio_vdev_has_feature(vdev,
2730 VIRTIO_NET_F_HASH_REPORT));
2731
2732 /* MAC_TABLE_ENTRIES may be different from the saved image */
2733 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2734 n->mac_table.in_use = 0;
2735 }
2736
2737 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2738 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2739 }
2740
2741 /*
2742 * curr_guest_offloads will be later overwritten by the
2743 * virtio_set_features_nocheck call done from the virtio_load.
2744 * Here we make sure it is preserved and restored accordingly
2745 * in the virtio_net_post_load_virtio callback.
2746 */
2747 n->saved_guest_offloads = n->curr_guest_offloads;
2748
2749 virtio_net_set_queues(n);
2750
2751 /* Find the first multicast entry in the saved MAC filter */
2752 for (i = 0; i < n->mac_table.in_use; i++) {
2753 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2754 break;
2755 }
2756 }
2757 n->mac_table.first_multi = i;
2758
2759 /* nc.link_down can't be migrated, so infer link_down according
2760 * to link status bit in n->status */
2761 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2762 for (i = 0; i < n->max_queues; i++) {
2763 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2764 }
2765
2766 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2767 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2768 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2769 QEMU_CLOCK_VIRTUAL,
2770 virtio_net_announce_timer, n);
2771 if (n->announce_timer.round) {
2772 timer_mod(n->announce_timer.tm,
2773 qemu_clock_get_ms(n->announce_timer.type));
2774 } else {
2775 qemu_announce_timer_del(&n->announce_timer, false);
2776 }
2777 }
2778
2779 if (n->rss_data.enabled) {
2780 trace_virtio_net_rss_enable(n->rss_data.hash_types,
2781 n->rss_data.indirections_len,
2782 sizeof(n->rss_data.key));
2783 } else {
2784 trace_virtio_net_rss_disable();
2785 }
2786 return 0;
2787 }
2788
2789 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2790 {
2791 VirtIONet *n = VIRTIO_NET(vdev);
2792 /*
2793 * The actual needed state is now in saved_guest_offloads,
2794 * see virtio_net_post_load_device for detail.
2795 * Restore it back and apply the desired offloads.
2796 */
2797 n->curr_guest_offloads = n->saved_guest_offloads;
2798 if (peer_has_vnet_hdr(n)) {
2799 virtio_net_apply_guest_offloads(n);
2800 }
2801
2802 return 0;
2803 }
2804
2805 /* tx_waiting field of a VirtIONetQueue */
2806 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2807 .name = "virtio-net-queue-tx_waiting",
2808 .fields = (VMStateField[]) {
2809 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2810 VMSTATE_END_OF_LIST()
2811 },
2812 };
2813
2814 static bool max_queues_gt_1(void *opaque, int version_id)
2815 {
2816 return VIRTIO_NET(opaque)->max_queues > 1;
2817 }
2818
2819 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2820 {
2821 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2822 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2823 }
2824
2825 static bool mac_table_fits(void *opaque, int version_id)
2826 {
2827 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2828 }
2829
2830 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2831 {
2832 return !mac_table_fits(opaque, version_id);
2833 }
2834
2835 /* This temporary type is shared by all the WITH_TMP methods
2836 * although only some fields are used by each.
2837 */
2838 struct VirtIONetMigTmp {
2839 VirtIONet *parent;
2840 VirtIONetQueue *vqs_1;
2841 uint16_t curr_queues_1;
2842 uint8_t has_ufo;
2843 uint32_t has_vnet_hdr;
2844 };
2845
2846 /* The 2nd and subsequent tx_waiting flags are loaded later than
2847 * the 1st entry in the queues and only if there's more than one
2848 * entry. We use the tmp mechanism to calculate a temporary
2849 * pointer and count and also validate the count.
2850 */
2851
2852 static int virtio_net_tx_waiting_pre_save(void *opaque)
2853 {
2854 struct VirtIONetMigTmp *tmp = opaque;
2855
2856 tmp->vqs_1 = tmp->parent->vqs + 1;
2857 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2858 if (tmp->parent->curr_queues == 0) {
2859 tmp->curr_queues_1 = 0;
2860 }
2861
2862 return 0;
2863 }
2864
2865 static int virtio_net_tx_waiting_pre_load(void *opaque)
2866 {
2867 struct VirtIONetMigTmp *tmp = opaque;
2868
2869 /* Reuse the pointer setup from save */
2870 virtio_net_tx_waiting_pre_save(opaque);
2871
2872 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2873 error_report("virtio-net: curr_queues %x > max_queues %x",
2874 tmp->parent->curr_queues, tmp->parent->max_queues);
2875
2876 return -EINVAL;
2877 }
2878
2879 return 0; /* all good */
2880 }
2881
2882 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2883 .name = "virtio-net-tx_waiting",
2884 .pre_load = virtio_net_tx_waiting_pre_load,
2885 .pre_save = virtio_net_tx_waiting_pre_save,
2886 .fields = (VMStateField[]) {
2887 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2888 curr_queues_1,
2889 vmstate_virtio_net_queue_tx_waiting,
2890 struct VirtIONetQueue),
2891 VMSTATE_END_OF_LIST()
2892 },
2893 };
2894
2895 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2896 * flag set we need to check that we have it
2897 */
2898 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2899 {
2900 struct VirtIONetMigTmp *tmp = opaque;
2901
2902 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2903 error_report("virtio-net: saved image requires TUN_F_UFO support");
2904 return -EINVAL;
2905 }
2906
2907 return 0;
2908 }
2909
2910 static int virtio_net_ufo_pre_save(void *opaque)
2911 {
2912 struct VirtIONetMigTmp *tmp = opaque;
2913
2914 tmp->has_ufo = tmp->parent->has_ufo;
2915
2916 return 0;
2917 }
2918
2919 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2920 .name = "virtio-net-ufo",
2921 .post_load = virtio_net_ufo_post_load,
2922 .pre_save = virtio_net_ufo_pre_save,
2923 .fields = (VMStateField[]) {
2924 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2925 VMSTATE_END_OF_LIST()
2926 },
2927 };
2928
2929 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2930 * flag set we need to check that we have it
2931 */
2932 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2933 {
2934 struct VirtIONetMigTmp *tmp = opaque;
2935
2936 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2937 error_report("virtio-net: saved image requires vnet_hdr=on");
2938 return -EINVAL;
2939 }
2940
2941 return 0;
2942 }
2943
2944 static int virtio_net_vnet_pre_save(void *opaque)
2945 {
2946 struct VirtIONetMigTmp *tmp = opaque;
2947
2948 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2949
2950 return 0;
2951 }
2952
2953 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2954 .name = "virtio-net-vnet",
2955 .post_load = virtio_net_vnet_post_load,
2956 .pre_save = virtio_net_vnet_pre_save,
2957 .fields = (VMStateField[]) {
2958 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2959 VMSTATE_END_OF_LIST()
2960 },
2961 };
2962
2963 static bool virtio_net_rss_needed(void *opaque)
2964 {
2965 return VIRTIO_NET(opaque)->rss_data.enabled;
2966 }
2967
2968 static const VMStateDescription vmstate_virtio_net_rss = {
2969 .name = "virtio-net-device/rss",
2970 .version_id = 1,
2971 .minimum_version_id = 1,
2972 .needed = virtio_net_rss_needed,
2973 .fields = (VMStateField[]) {
2974 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2975 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2976 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2977 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2978 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2979 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2980 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2981 VIRTIO_NET_RSS_MAX_KEY_SIZE),
2982 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2983 rss_data.indirections_len, 0,
2984 vmstate_info_uint16, uint16_t),
2985 VMSTATE_END_OF_LIST()
2986 },
2987 };
2988
2989 static const VMStateDescription vmstate_virtio_net_device = {
2990 .name = "virtio-net-device",
2991 .version_id = VIRTIO_NET_VM_VERSION,
2992 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2993 .post_load = virtio_net_post_load_device,
2994 .fields = (VMStateField[]) {
2995 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2996 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2997 vmstate_virtio_net_queue_tx_waiting,
2998 VirtIONetQueue),
2999 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3000 VMSTATE_UINT16(status, VirtIONet),
3001 VMSTATE_UINT8(promisc, VirtIONet),
3002 VMSTATE_UINT8(allmulti, VirtIONet),
3003 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3004
3005 /* Guarded pair: If it fits we load it, else we throw it away
3006 * - can happen if source has a larger MAC table.; post-load
3007 * sets flags in this case.
3008 */
3009 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3010 0, mac_table_fits, mac_table.in_use,
3011 ETH_ALEN),
3012 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3013 mac_table.in_use, ETH_ALEN),
3014
3015 /* Note: This is an array of uint32's that's always been saved as a
3016 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3017 * but based on the uint.
3018 */
3019 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3020 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3021 vmstate_virtio_net_has_vnet),
3022 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3023 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3024 VMSTATE_UINT8(alluni, VirtIONet),
3025 VMSTATE_UINT8(nomulti, VirtIONet),
3026 VMSTATE_UINT8(nouni, VirtIONet),
3027 VMSTATE_UINT8(nobcast, VirtIONet),
3028 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3029 vmstate_virtio_net_has_ufo),
3030 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3031 vmstate_info_uint16_equal, uint16_t),
3032 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3033 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3034 vmstate_virtio_net_tx_waiting),
3035 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3036 has_ctrl_guest_offloads),
3037 VMSTATE_END_OF_LIST()
3038 },
3039 .subsections = (const VMStateDescription * []) {
3040 &vmstate_virtio_net_rss,
3041 NULL
3042 }
3043 };
3044
3045 static NetClientInfo net_virtio_info = {
3046 .type = NET_CLIENT_DRIVER_NIC,
3047 .size = sizeof(NICState),
3048 .can_receive = virtio_net_can_receive,
3049 .receive = virtio_net_receive,
3050 .link_status_changed = virtio_net_set_link_status,
3051 .query_rx_filter = virtio_net_query_rxfilter,
3052 .announce = virtio_net_announce,
3053 };
3054
3055 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3056 {
3057 VirtIONet *n = VIRTIO_NET(vdev);
3058 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3059 assert(n->vhost_started);
3060 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3061 }
3062
3063 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3064 bool mask)
3065 {
3066 VirtIONet *n = VIRTIO_NET(vdev);
3067 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3068 assert(n->vhost_started);
3069 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3070 vdev, idx, mask);
3071 }
3072
3073 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3074 {
3075 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3076
3077 n->config_size = virtio_feature_get_config_size(feature_sizes,
3078 host_features);
3079 }
3080
3081 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3082 const char *type)
3083 {
3084 /*
3085 * The name can be NULL, the netclient name will be type.x.
3086 */
3087 assert(type != NULL);
3088
3089 g_free(n->netclient_name);
3090 g_free(n->netclient_type);
3091 n->netclient_name = g_strdup(name);
3092 n->netclient_type = g_strdup(type);
3093 }
3094
3095 static bool failover_unplug_primary(VirtIONet *n)
3096 {
3097 HotplugHandler *hotplug_ctrl;
3098 PCIDevice *pci_dev;
3099 Error *err = NULL;
3100
3101 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3102 if (hotplug_ctrl) {
3103 pci_dev = PCI_DEVICE(n->primary_dev);
3104 pci_dev->partially_hotplugged = true;
3105 hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3106 if (err) {
3107 error_report_err(err);
3108 return false;
3109 }
3110 } else {
3111 return false;
3112 }
3113 return true;
3114 }
3115
3116 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3117 {
3118 Error *err = NULL;
3119 HotplugHandler *hotplug_ctrl;
3120 PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3121
3122 if (!pdev->partially_hotplugged) {
3123 return true;
3124 }
3125 if (!n->primary_device_opts) {
3126 n->primary_device_opts = qemu_opts_from_qdict(
3127 qemu_find_opts("device"),
3128 n->primary_device_dict, errp);
3129 if (!n->primary_device_opts) {
3130 return false;
3131 }
3132 }
3133 n->primary_bus = n->primary_dev->parent_bus;
3134 if (!n->primary_bus) {
3135 error_setg(errp, "virtio_net: couldn't find primary bus");
3136 return false;
3137 }
3138 qdev_set_parent_bus(n->primary_dev, n->primary_bus);
3139 n->primary_should_be_hidden = false;
3140 qemu_opt_set_bool(n->primary_device_opts,
3141 "partially_hotplugged", true, &err);
3142 if (err) {
3143 goto out;
3144 }
3145 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3146 if (hotplug_ctrl) {
3147 hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3148 if (err) {
3149 goto out;
3150 }
3151 hotplug_handler_plug(hotplug_ctrl, n->primary_dev, errp);
3152 }
3153
3154 out:
3155 error_propagate(errp, err);
3156 return !err;
3157 }
3158
3159 static void virtio_net_handle_migration_primary(VirtIONet *n,
3160 MigrationState *s)
3161 {
3162 bool should_be_hidden;
3163 Error *err = NULL;
3164
3165 should_be_hidden = atomic_read(&n->primary_should_be_hidden);
3166
3167 if (!n->primary_dev) {
3168 n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
3169 if (!n->primary_dev) {
3170 return;
3171 }
3172 }
3173
3174 if (migration_in_setup(s) && !should_be_hidden) {
3175 if (failover_unplug_primary(n)) {
3176 vmstate_unregister(VMSTATE_IF(n->primary_dev),
3177 qdev_get_vmsd(n->primary_dev),
3178 n->primary_dev);
3179 qapi_event_send_unplug_primary(n->primary_device_id);
3180 atomic_set(&n->primary_should_be_hidden, true);
3181 } else {
3182 warn_report("couldn't unplug primary device");
3183 }
3184 } else if (migration_has_failed(s)) {
3185 /* We already unplugged the device let's plug it back */
3186 if (!failover_replug_primary(n, &err)) {
3187 if (err) {
3188 error_report_err(err);
3189 }
3190 }
3191 }
3192 }
3193
3194 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3195 {
3196 MigrationState *s = data;
3197 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3198 virtio_net_handle_migration_primary(n, s);
3199 }
3200
3201 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3202 QemuOpts *device_opts)
3203 {
3204 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3205 bool match_found = false;
3206 bool hide = false;
3207
3208 if (!device_opts) {
3209 return -1;
3210 }
3211 n->primary_device_dict = qemu_opts_to_qdict(device_opts,
3212 n->primary_device_dict);
3213 if (n->primary_device_dict) {
3214 g_free(n->standby_id);
3215 n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
3216 "failover_pair_id"));
3217 }
3218 if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
3219 match_found = true;
3220 } else {
3221 match_found = false;
3222 hide = false;
3223 g_free(n->standby_id);
3224 n->primary_device_dict = NULL;
3225 goto out;
3226 }
3227
3228 n->primary_device_opts = device_opts;
3229
3230 /* primary_should_be_hidden is set during feature negotiation */
3231 hide = atomic_read(&n->primary_should_be_hidden);
3232
3233 if (n->primary_device_dict) {
3234 g_free(n->primary_device_id);
3235 n->primary_device_id = g_strdup(qdict_get_try_str(
3236 n->primary_device_dict, "id"));
3237 if (!n->primary_device_id) {
3238 warn_report("primary_device_id not set");
3239 }
3240 }
3241
3242 out:
3243 if (match_found && hide) {
3244 return 1;
3245 } else if (match_found && !hide) {
3246 return 0;
3247 } else {
3248 return -1;
3249 }
3250 }
3251
3252 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3253 {
3254 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3255 VirtIONet *n = VIRTIO_NET(dev);
3256 NetClientState *nc;
3257 int i;
3258
3259 if (n->net_conf.mtu) {
3260 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3261 }
3262
3263 if (n->net_conf.duplex_str) {
3264 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3265 n->net_conf.duplex = DUPLEX_HALF;
3266 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3267 n->net_conf.duplex = DUPLEX_FULL;
3268 } else {
3269 error_setg(errp, "'duplex' must be 'half' or 'full'");
3270 return;
3271 }
3272 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3273 } else {
3274 n->net_conf.duplex = DUPLEX_UNKNOWN;
3275 }
3276
3277 if (n->net_conf.speed < SPEED_UNKNOWN) {
3278 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3279 return;
3280 }
3281 if (n->net_conf.speed >= 0) {
3282 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3283 }
3284
3285 if (n->failover) {
3286 n->primary_listener.should_be_hidden =
3287 virtio_net_primary_should_be_hidden;
3288 atomic_set(&n->primary_should_be_hidden, true);
3289 device_listener_register(&n->primary_listener);
3290 n->migration_state.notify = virtio_net_migration_state_notifier;
3291 add_migration_state_change_notifier(&n->migration_state);
3292 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3293 }
3294
3295 virtio_net_set_config_size(n, n->host_features);
3296 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3297
3298 /*
3299 * We set a lower limit on RX queue size to what it always was.
3300 * Guests that want a smaller ring can always resize it without
3301 * help from us (using virtio 1 and up).
3302 */
3303 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3304 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3305 !is_power_of_2(n->net_conf.rx_queue_size)) {
3306 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3307 "must be a power of 2 between %d and %d.",
3308 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3309 VIRTQUEUE_MAX_SIZE);
3310 virtio_cleanup(vdev);
3311 return;
3312 }
3313
3314 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3315 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3316 !is_power_of_2(n->net_conf.tx_queue_size)) {
3317 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3318 "must be a power of 2 between %d and %d",
3319 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3320 VIRTQUEUE_MAX_SIZE);
3321 virtio_cleanup(vdev);
3322 return;
3323 }
3324
3325 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3326 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3327 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3328 "must be a positive integer less than %d.",
3329 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3330 virtio_cleanup(vdev);
3331 return;
3332 }
3333 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3334 n->curr_queues = 1;
3335 n->tx_timeout = n->net_conf.txtimer;
3336
3337 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3338 && strcmp(n->net_conf.tx, "bh")) {
3339 warn_report("virtio-net: "
3340 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3341 n->net_conf.tx);
3342 error_printf("Defaulting to \"bh\"");
3343 }
3344
3345 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3346 n->net_conf.tx_queue_size);
3347
3348 for (i = 0; i < n->max_queues; i++) {
3349 virtio_net_add_queue(n, i);
3350 }
3351
3352 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3353 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3354 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3355 n->status = VIRTIO_NET_S_LINK_UP;
3356 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3357 QEMU_CLOCK_VIRTUAL,
3358 virtio_net_announce_timer, n);
3359 n->announce_timer.round = 0;
3360
3361 if (n->netclient_type) {
3362 /*
3363 * Happen when virtio_net_set_netclient_name has been called.
3364 */
3365 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3366 n->netclient_type, n->netclient_name, n);
3367 } else {
3368 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3369 object_get_typename(OBJECT(dev)), dev->id, n);
3370 }
3371
3372 peer_test_vnet_hdr(n);
3373 if (peer_has_vnet_hdr(n)) {
3374 for (i = 0; i < n->max_queues; i++) {
3375 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3376 }
3377 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3378 } else {
3379 n->host_hdr_len = 0;
3380 }
3381
3382 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3383
3384 n->vqs[0].tx_waiting = 0;
3385 n->tx_burst = n->net_conf.txburst;
3386 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3387 n->promisc = 1; /* for compatibility */
3388
3389 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3390
3391 n->vlans = g_malloc0(MAX_VLAN >> 3);
3392
3393 nc = qemu_get_queue(n->nic);
3394 nc->rxfilter_notify_enabled = 1;
3395
3396 QTAILQ_INIT(&n->rsc_chains);
3397 n->qdev = dev;
3398
3399 net_rx_pkt_init(&n->rx_pkt, false);
3400 }
3401
3402 static void virtio_net_device_unrealize(DeviceState *dev)
3403 {
3404 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3405 VirtIONet *n = VIRTIO_NET(dev);
3406 int i, max_queues;
3407
3408 /* This will stop vhost backend if appropriate. */
3409 virtio_net_set_status(vdev, 0);
3410
3411 g_free(n->netclient_name);
3412 n->netclient_name = NULL;
3413 g_free(n->netclient_type);
3414 n->netclient_type = NULL;
3415
3416 g_free(n->mac_table.macs);
3417 g_free(n->vlans);
3418
3419 if (n->failover) {
3420 g_free(n->primary_device_id);
3421 g_free(n->standby_id);
3422 qobject_unref(n->primary_device_dict);
3423 n->primary_device_dict = NULL;
3424 }
3425
3426 max_queues = n->multiqueue ? n->max_queues : 1;
3427 for (i = 0; i < max_queues; i++) {
3428 virtio_net_del_queue(n, i);
3429 }
3430 /* delete also control vq */
3431 virtio_del_queue(vdev, max_queues * 2);
3432 qemu_announce_timer_del(&n->announce_timer, false);
3433 g_free(n->vqs);
3434 qemu_del_nic(n->nic);
3435 virtio_net_rsc_cleanup(n);
3436 g_free(n->rss_data.indirections_table);
3437 net_rx_pkt_uninit(n->rx_pkt);
3438 virtio_cleanup(vdev);
3439 }
3440
3441 static void virtio_net_instance_init(Object *obj)
3442 {
3443 VirtIONet *n = VIRTIO_NET(obj);
3444
3445 /*
3446 * The default config_size is sizeof(struct virtio_net_config).
3447 * Can be overriden with virtio_net_set_config_size.
3448 */
3449 n->config_size = sizeof(struct virtio_net_config);
3450 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3451 "bootindex", "/ethernet-phy@0",
3452 DEVICE(n));
3453 }
3454
3455 static int virtio_net_pre_save(void *opaque)
3456 {
3457 VirtIONet *n = opaque;
3458
3459 /* At this point, backend must be stopped, otherwise
3460 * it might keep writing to memory. */
3461 assert(!n->vhost_started);
3462
3463 return 0;
3464 }
3465
3466 static bool primary_unplug_pending(void *opaque)
3467 {
3468 DeviceState *dev = opaque;
3469 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3470 VirtIONet *n = VIRTIO_NET(vdev);
3471
3472 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3473 return false;
3474 }
3475 return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3476 }
3477
3478 static bool dev_unplug_pending(void *opaque)
3479 {
3480 DeviceState *dev = opaque;
3481 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3482
3483 return vdc->primary_unplug_pending(dev);
3484 }
3485
3486 static const VMStateDescription vmstate_virtio_net = {
3487 .name = "virtio-net",
3488 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3489 .version_id = VIRTIO_NET_VM_VERSION,
3490 .fields = (VMStateField[]) {
3491 VMSTATE_VIRTIO_DEVICE,
3492 VMSTATE_END_OF_LIST()
3493 },
3494 .pre_save = virtio_net_pre_save,
3495 .dev_unplug_pending = dev_unplug_pending,
3496 };
3497
3498 static Property virtio_net_properties[] = {
3499 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3500 VIRTIO_NET_F_CSUM, true),
3501 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3502 VIRTIO_NET_F_GUEST_CSUM, true),
3503 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3504 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3505 VIRTIO_NET_F_GUEST_TSO4, true),
3506 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3507 VIRTIO_NET_F_GUEST_TSO6, true),
3508 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3509 VIRTIO_NET_F_GUEST_ECN, true),
3510 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3511 VIRTIO_NET_F_GUEST_UFO, true),
3512 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3513 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3514 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3515 VIRTIO_NET_F_HOST_TSO4, true),
3516 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3517 VIRTIO_NET_F_HOST_TSO6, true),
3518 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3519 VIRTIO_NET_F_HOST_ECN, true),
3520 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3521 VIRTIO_NET_F_HOST_UFO, true),
3522 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3523 VIRTIO_NET_F_MRG_RXBUF, true),
3524 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3525 VIRTIO_NET_F_STATUS, true),
3526 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3527 VIRTIO_NET_F_CTRL_VQ, true),
3528 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3529 VIRTIO_NET_F_CTRL_RX, true),
3530 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3531 VIRTIO_NET_F_CTRL_VLAN, true),
3532 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3533 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3534 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3535 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3536 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3537 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),