linux-user: Add support for btrfs ioctls used to scrub a filesystem
[qemu.git] / softmmu / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "qemu/cutils.h"
29 #include "migration/vmstate.h"
30 #include "monitor/monitor.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-misc.h"
33 #include "qapi/qapi-events-run-state.h"
34 #include "qapi/qmp/qerror.h"
35 #include "qemu/error-report.h"
36 #include "qemu/qemu-print.h"
37 #include "sysemu/tcg.h"
38 #include "sysemu/block-backend.h"
39 #include "exec/gdbstub.h"
40 #include "sysemu/dma.h"
41 #include "sysemu/hw_accel.h"
42 #include "sysemu/kvm.h"
43 #include "sysemu/hax.h"
44 #include "sysemu/hvf.h"
45 #include "sysemu/whpx.h"
46 #include "exec/exec-all.h"
47
48 #include "qemu/thread.h"
49 #include "qemu/plugin.h"
50 #include "sysemu/cpus.h"
51 #include "sysemu/qtest.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/option.h"
54 #include "qemu/bitmap.h"
55 #include "qemu/seqlock.h"
56 #include "qemu/guest-random.h"
57 #include "tcg/tcg.h"
58 #include "hw/nmi.h"
59 #include "sysemu/replay.h"
60 #include "sysemu/runstate.h"
61 #include "hw/boards.h"
62 #include "hw/hw.h"
63
64 #include "sysemu/cpu-throttle.h"
65
66 #ifdef CONFIG_LINUX
67
68 #include <sys/prctl.h>
69
70 #ifndef PR_MCE_KILL
71 #define PR_MCE_KILL 33
72 #endif
73
74 #ifndef PR_MCE_KILL_SET
75 #define PR_MCE_KILL_SET 1
76 #endif
77
78 #ifndef PR_MCE_KILL_EARLY
79 #define PR_MCE_KILL_EARLY 1
80 #endif
81
82 #endif /* CONFIG_LINUX */
83
84 static QemuMutex qemu_global_mutex;
85
86 int64_t max_delay;
87 int64_t max_advance;
88
89 bool cpu_is_stopped(CPUState *cpu)
90 {
91 return cpu->stopped || !runstate_is_running();
92 }
93
94 static inline bool cpu_work_list_empty(CPUState *cpu)
95 {
96 bool ret;
97
98 qemu_mutex_lock(&cpu->work_mutex);
99 ret = QSIMPLEQ_EMPTY(&cpu->work_list);
100 qemu_mutex_unlock(&cpu->work_mutex);
101 return ret;
102 }
103
104 static bool cpu_thread_is_idle(CPUState *cpu)
105 {
106 if (cpu->stop || !cpu_work_list_empty(cpu)) {
107 return false;
108 }
109 if (cpu_is_stopped(cpu)) {
110 return true;
111 }
112 if (!cpu->halted || cpu_has_work(cpu) ||
113 kvm_halt_in_kernel()) {
114 return false;
115 }
116 return true;
117 }
118
119 static bool all_cpu_threads_idle(void)
120 {
121 CPUState *cpu;
122
123 CPU_FOREACH(cpu) {
124 if (!cpu_thread_is_idle(cpu)) {
125 return false;
126 }
127 }
128 return true;
129 }
130
131 /***********************************************************/
132 /* guest cycle counter */
133
134 /* Protected by TimersState seqlock */
135
136 static bool icount_sleep = true;
137 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
138 #define MAX_ICOUNT_SHIFT 10
139
140 typedef struct TimersState {
141 /* Protected by BQL. */
142 int64_t cpu_ticks_prev;
143 int64_t cpu_ticks_offset;
144
145 /* Protect fields that can be respectively read outside the
146 * BQL, and written from multiple threads.
147 */
148 QemuSeqLock vm_clock_seqlock;
149 QemuSpin vm_clock_lock;
150
151 int16_t cpu_ticks_enabled;
152
153 /* Conversion factor from emulated instructions to virtual clock ticks. */
154 int16_t icount_time_shift;
155
156 /* Compensate for varying guest execution speed. */
157 int64_t qemu_icount_bias;
158
159 int64_t vm_clock_warp_start;
160 int64_t cpu_clock_offset;
161
162 /* Only written by TCG thread */
163 int64_t qemu_icount;
164
165 /* for adjusting icount */
166 QEMUTimer *icount_rt_timer;
167 QEMUTimer *icount_vm_timer;
168 QEMUTimer *icount_warp_timer;
169 } TimersState;
170
171 static TimersState timers_state;
172 bool mttcg_enabled;
173
174
175 /* The current number of executed instructions is based on what we
176 * originally budgeted minus the current state of the decrementing
177 * icount counters in extra/u16.low.
178 */
179 static int64_t cpu_get_icount_executed(CPUState *cpu)
180 {
181 return (cpu->icount_budget -
182 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
183 }
184
185 /*
186 * Update the global shared timer_state.qemu_icount to take into
187 * account executed instructions. This is done by the TCG vCPU
188 * thread so the main-loop can see time has moved forward.
189 */
190 static void cpu_update_icount_locked(CPUState *cpu)
191 {
192 int64_t executed = cpu_get_icount_executed(cpu);
193 cpu->icount_budget -= executed;
194
195 atomic_set_i64(&timers_state.qemu_icount,
196 timers_state.qemu_icount + executed);
197 }
198
199 /*
200 * Update the global shared timer_state.qemu_icount to take into
201 * account executed instructions. This is done by the TCG vCPU
202 * thread so the main-loop can see time has moved forward.
203 */
204 void cpu_update_icount(CPUState *cpu)
205 {
206 seqlock_write_lock(&timers_state.vm_clock_seqlock,
207 &timers_state.vm_clock_lock);
208 cpu_update_icount_locked(cpu);
209 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
210 &timers_state.vm_clock_lock);
211 }
212
213 static int64_t cpu_get_icount_raw_locked(void)
214 {
215 CPUState *cpu = current_cpu;
216
217 if (cpu && cpu->running) {
218 if (!cpu->can_do_io) {
219 error_report("Bad icount read");
220 exit(1);
221 }
222 /* Take into account what has run */
223 cpu_update_icount_locked(cpu);
224 }
225 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
226 return atomic_read_i64(&timers_state.qemu_icount);
227 }
228
229 static int64_t cpu_get_icount_locked(void)
230 {
231 int64_t icount = cpu_get_icount_raw_locked();
232 return atomic_read_i64(&timers_state.qemu_icount_bias) +
233 cpu_icount_to_ns(icount);
234 }
235
236 int64_t cpu_get_icount_raw(void)
237 {
238 int64_t icount;
239 unsigned start;
240
241 do {
242 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
243 icount = cpu_get_icount_raw_locked();
244 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
245
246 return icount;
247 }
248
249 /* Return the virtual CPU time, based on the instruction counter. */
250 int64_t cpu_get_icount(void)
251 {
252 int64_t icount;
253 unsigned start;
254
255 do {
256 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
257 icount = cpu_get_icount_locked();
258 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
259
260 return icount;
261 }
262
263 int64_t cpu_icount_to_ns(int64_t icount)
264 {
265 return icount << atomic_read(&timers_state.icount_time_shift);
266 }
267
268 static int64_t cpu_get_ticks_locked(void)
269 {
270 int64_t ticks = timers_state.cpu_ticks_offset;
271 if (timers_state.cpu_ticks_enabled) {
272 ticks += cpu_get_host_ticks();
273 }
274
275 if (timers_state.cpu_ticks_prev > ticks) {
276 /* Non increasing ticks may happen if the host uses software suspend. */
277 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
278 ticks = timers_state.cpu_ticks_prev;
279 }
280
281 timers_state.cpu_ticks_prev = ticks;
282 return ticks;
283 }
284
285 /* return the time elapsed in VM between vm_start and vm_stop. Unless
286 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
287 * counter.
288 */
289 int64_t cpu_get_ticks(void)
290 {
291 int64_t ticks;
292
293 if (use_icount) {
294 return cpu_get_icount();
295 }
296
297 qemu_spin_lock(&timers_state.vm_clock_lock);
298 ticks = cpu_get_ticks_locked();
299 qemu_spin_unlock(&timers_state.vm_clock_lock);
300 return ticks;
301 }
302
303 static int64_t cpu_get_clock_locked(void)
304 {
305 int64_t time;
306
307 time = timers_state.cpu_clock_offset;
308 if (timers_state.cpu_ticks_enabled) {
309 time += get_clock();
310 }
311
312 return time;
313 }
314
315 /* Return the monotonic time elapsed in VM, i.e.,
316 * the time between vm_start and vm_stop
317 */
318 int64_t cpu_get_clock(void)
319 {
320 int64_t ti;
321 unsigned start;
322
323 do {
324 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
325 ti = cpu_get_clock_locked();
326 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
327
328 return ti;
329 }
330
331 /* enable cpu_get_ticks()
332 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
333 */
334 void cpu_enable_ticks(void)
335 {
336 seqlock_write_lock(&timers_state.vm_clock_seqlock,
337 &timers_state.vm_clock_lock);
338 if (!timers_state.cpu_ticks_enabled) {
339 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
340 timers_state.cpu_clock_offset -= get_clock();
341 timers_state.cpu_ticks_enabled = 1;
342 }
343 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
344 &timers_state.vm_clock_lock);
345 }
346
347 /* disable cpu_get_ticks() : the clock is stopped. You must not call
348 * cpu_get_ticks() after that.
349 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
350 */
351 void cpu_disable_ticks(void)
352 {
353 seqlock_write_lock(&timers_state.vm_clock_seqlock,
354 &timers_state.vm_clock_lock);
355 if (timers_state.cpu_ticks_enabled) {
356 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
357 timers_state.cpu_clock_offset = cpu_get_clock_locked();
358 timers_state.cpu_ticks_enabled = 0;
359 }
360 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
361 &timers_state.vm_clock_lock);
362 }
363
364 /* Correlation between real and virtual time is always going to be
365 fairly approximate, so ignore small variation.
366 When the guest is idle real and virtual time will be aligned in
367 the IO wait loop. */
368 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
369
370 static void icount_adjust(void)
371 {
372 int64_t cur_time;
373 int64_t cur_icount;
374 int64_t delta;
375
376 /* Protected by TimersState mutex. */
377 static int64_t last_delta;
378
379 /* If the VM is not running, then do nothing. */
380 if (!runstate_is_running()) {
381 return;
382 }
383
384 seqlock_write_lock(&timers_state.vm_clock_seqlock,
385 &timers_state.vm_clock_lock);
386 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
387 cpu_get_clock_locked());
388 cur_icount = cpu_get_icount_locked();
389
390 delta = cur_icount - cur_time;
391 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
392 if (delta > 0
393 && last_delta + ICOUNT_WOBBLE < delta * 2
394 && timers_state.icount_time_shift > 0) {
395 /* The guest is getting too far ahead. Slow time down. */
396 atomic_set(&timers_state.icount_time_shift,
397 timers_state.icount_time_shift - 1);
398 }
399 if (delta < 0
400 && last_delta - ICOUNT_WOBBLE > delta * 2
401 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
402 /* The guest is getting too far behind. Speed time up. */
403 atomic_set(&timers_state.icount_time_shift,
404 timers_state.icount_time_shift + 1);
405 }
406 last_delta = delta;
407 atomic_set_i64(&timers_state.qemu_icount_bias,
408 cur_icount - (timers_state.qemu_icount
409 << timers_state.icount_time_shift));
410 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
411 &timers_state.vm_clock_lock);
412 }
413
414 static void icount_adjust_rt(void *opaque)
415 {
416 timer_mod(timers_state.icount_rt_timer,
417 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
418 icount_adjust();
419 }
420
421 static void icount_adjust_vm(void *opaque)
422 {
423 timer_mod(timers_state.icount_vm_timer,
424 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
425 NANOSECONDS_PER_SECOND / 10);
426 icount_adjust();
427 }
428
429 static int64_t qemu_icount_round(int64_t count)
430 {
431 int shift = atomic_read(&timers_state.icount_time_shift);
432 return (count + (1 << shift) - 1) >> shift;
433 }
434
435 static void icount_warp_rt(void)
436 {
437 unsigned seq;
438 int64_t warp_start;
439
440 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
441 * changes from -1 to another value, so the race here is okay.
442 */
443 do {
444 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
445 warp_start = timers_state.vm_clock_warp_start;
446 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
447
448 if (warp_start == -1) {
449 return;
450 }
451
452 seqlock_write_lock(&timers_state.vm_clock_seqlock,
453 &timers_state.vm_clock_lock);
454 if (runstate_is_running()) {
455 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
456 cpu_get_clock_locked());
457 int64_t warp_delta;
458
459 warp_delta = clock - timers_state.vm_clock_warp_start;
460 if (use_icount == 2) {
461 /*
462 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
463 * far ahead of real time.
464 */
465 int64_t cur_icount = cpu_get_icount_locked();
466 int64_t delta = clock - cur_icount;
467 warp_delta = MIN(warp_delta, delta);
468 }
469 atomic_set_i64(&timers_state.qemu_icount_bias,
470 timers_state.qemu_icount_bias + warp_delta);
471 }
472 timers_state.vm_clock_warp_start = -1;
473 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
474 &timers_state.vm_clock_lock);
475
476 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
477 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
478 }
479 }
480
481 static void icount_timer_cb(void *opaque)
482 {
483 /* No need for a checkpoint because the timer already synchronizes
484 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
485 */
486 icount_warp_rt();
487 }
488
489 void qtest_clock_warp(int64_t dest)
490 {
491 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
492 AioContext *aio_context;
493 assert(qtest_enabled());
494 aio_context = qemu_get_aio_context();
495 while (clock < dest) {
496 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
497 QEMU_TIMER_ATTR_ALL);
498 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
499
500 seqlock_write_lock(&timers_state.vm_clock_seqlock,
501 &timers_state.vm_clock_lock);
502 atomic_set_i64(&timers_state.qemu_icount_bias,
503 timers_state.qemu_icount_bias + warp);
504 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
505 &timers_state.vm_clock_lock);
506
507 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
508 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
509 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
510 }
511 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
512 }
513
514 void qemu_start_warp_timer(void)
515 {
516 int64_t clock;
517 int64_t deadline;
518
519 if (!use_icount) {
520 return;
521 }
522
523 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
524 * do not fire, so computing the deadline does not make sense.
525 */
526 if (!runstate_is_running()) {
527 return;
528 }
529
530 if (replay_mode != REPLAY_MODE_PLAY) {
531 if (!all_cpu_threads_idle()) {
532 return;
533 }
534
535 if (qtest_enabled()) {
536 /* When testing, qtest commands advance icount. */
537 return;
538 }
539
540 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
541 } else {
542 /* warp clock deterministically in record/replay mode */
543 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
544 /* vCPU is sleeping and warp can't be started.
545 It is probably a race condition: notification sent
546 to vCPU was processed in advance and vCPU went to sleep.
547 Therefore we have to wake it up for doing someting. */
548 if (replay_has_checkpoint()) {
549 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
550 }
551 return;
552 }
553 }
554
555 /* We want to use the earliest deadline from ALL vm_clocks */
556 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
557 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
558 ~QEMU_TIMER_ATTR_EXTERNAL);
559 if (deadline < 0) {
560 static bool notified;
561 if (!icount_sleep && !notified) {
562 warn_report("icount sleep disabled and no active timers");
563 notified = true;
564 }
565 return;
566 }
567
568 if (deadline > 0) {
569 /*
570 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
571 * sleep. Otherwise, the CPU might be waiting for a future timer
572 * interrupt to wake it up, but the interrupt never comes because
573 * the vCPU isn't running any insns and thus doesn't advance the
574 * QEMU_CLOCK_VIRTUAL.
575 */
576 if (!icount_sleep) {
577 /*
578 * We never let VCPUs sleep in no sleep icount mode.
579 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
580 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
581 * It is useful when we want a deterministic execution time,
582 * isolated from host latencies.
583 */
584 seqlock_write_lock(&timers_state.vm_clock_seqlock,
585 &timers_state.vm_clock_lock);
586 atomic_set_i64(&timers_state.qemu_icount_bias,
587 timers_state.qemu_icount_bias + deadline);
588 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
589 &timers_state.vm_clock_lock);
590 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
591 } else {
592 /*
593 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
594 * "real" time, (related to the time left until the next event) has
595 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
596 * This avoids that the warps are visible externally; for example,
597 * you will not be sending network packets continuously instead of
598 * every 100ms.
599 */
600 seqlock_write_lock(&timers_state.vm_clock_seqlock,
601 &timers_state.vm_clock_lock);
602 if (timers_state.vm_clock_warp_start == -1
603 || timers_state.vm_clock_warp_start > clock) {
604 timers_state.vm_clock_warp_start = clock;
605 }
606 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
607 &timers_state.vm_clock_lock);
608 timer_mod_anticipate(timers_state.icount_warp_timer,
609 clock + deadline);
610 }
611 } else if (deadline == 0) {
612 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
613 }
614 }
615
616 static void qemu_account_warp_timer(void)
617 {
618 if (!use_icount || !icount_sleep) {
619 return;
620 }
621
622 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
623 * do not fire, so computing the deadline does not make sense.
624 */
625 if (!runstate_is_running()) {
626 return;
627 }
628
629 /* warp clock deterministically in record/replay mode */
630 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
631 return;
632 }
633
634 timer_del(timers_state.icount_warp_timer);
635 icount_warp_rt();
636 }
637
638 static bool icount_state_needed(void *opaque)
639 {
640 return use_icount;
641 }
642
643 static bool warp_timer_state_needed(void *opaque)
644 {
645 TimersState *s = opaque;
646 return s->icount_warp_timer != NULL;
647 }
648
649 static bool adjust_timers_state_needed(void *opaque)
650 {
651 TimersState *s = opaque;
652 return s->icount_rt_timer != NULL;
653 }
654
655 static bool shift_state_needed(void *opaque)
656 {
657 return use_icount == 2;
658 }
659
660 /*
661 * Subsection for warp timer migration is optional, because may not be created
662 */
663 static const VMStateDescription icount_vmstate_warp_timer = {
664 .name = "timer/icount/warp_timer",
665 .version_id = 1,
666 .minimum_version_id = 1,
667 .needed = warp_timer_state_needed,
668 .fields = (VMStateField[]) {
669 VMSTATE_INT64(vm_clock_warp_start, TimersState),
670 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
671 VMSTATE_END_OF_LIST()
672 }
673 };
674
675 static const VMStateDescription icount_vmstate_adjust_timers = {
676 .name = "timer/icount/timers",
677 .version_id = 1,
678 .minimum_version_id = 1,
679 .needed = adjust_timers_state_needed,
680 .fields = (VMStateField[]) {
681 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
682 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
683 VMSTATE_END_OF_LIST()
684 }
685 };
686
687 static const VMStateDescription icount_vmstate_shift = {
688 .name = "timer/icount/shift",
689 .version_id = 1,
690 .minimum_version_id = 1,
691 .needed = shift_state_needed,
692 .fields = (VMStateField[]) {
693 VMSTATE_INT16(icount_time_shift, TimersState),
694 VMSTATE_END_OF_LIST()
695 }
696 };
697
698 /*
699 * This is a subsection for icount migration.
700 */
701 static const VMStateDescription icount_vmstate_timers = {
702 .name = "timer/icount",
703 .version_id = 1,
704 .minimum_version_id = 1,
705 .needed = icount_state_needed,
706 .fields = (VMStateField[]) {
707 VMSTATE_INT64(qemu_icount_bias, TimersState),
708 VMSTATE_INT64(qemu_icount, TimersState),
709 VMSTATE_END_OF_LIST()
710 },
711 .subsections = (const VMStateDescription*[]) {
712 &icount_vmstate_warp_timer,
713 &icount_vmstate_adjust_timers,
714 &icount_vmstate_shift,
715 NULL
716 }
717 };
718
719 static const VMStateDescription vmstate_timers = {
720 .name = "timer",
721 .version_id = 2,
722 .minimum_version_id = 1,
723 .fields = (VMStateField[]) {
724 VMSTATE_INT64(cpu_ticks_offset, TimersState),
725 VMSTATE_UNUSED(8),
726 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
727 VMSTATE_END_OF_LIST()
728 },
729 .subsections = (const VMStateDescription*[]) {
730 &icount_vmstate_timers,
731 NULL
732 }
733 };
734
735 void cpu_ticks_init(void)
736 {
737 seqlock_init(&timers_state.vm_clock_seqlock);
738 qemu_spin_init(&timers_state.vm_clock_lock);
739 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
740 cpu_throttle_init();
741 }
742
743 void configure_icount(QemuOpts *opts, Error **errp)
744 {
745 const char *option = qemu_opt_get(opts, "shift");
746 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
747 bool align = qemu_opt_get_bool(opts, "align", false);
748 long time_shift = -1;
749
750 if (!option) {
751 if (qemu_opt_get(opts, "align") != NULL) {
752 error_setg(errp, "Please specify shift option when using align");
753 }
754 return;
755 }
756
757 if (align && !sleep) {
758 error_setg(errp, "align=on and sleep=off are incompatible");
759 return;
760 }
761
762 if (strcmp(option, "auto") != 0) {
763 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
764 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
765 error_setg(errp, "icount: Invalid shift value");
766 return;
767 }
768 } else if (icount_align_option) {
769 error_setg(errp, "shift=auto and align=on are incompatible");
770 return;
771 } else if (!icount_sleep) {
772 error_setg(errp, "shift=auto and sleep=off are incompatible");
773 return;
774 }
775
776 icount_sleep = sleep;
777 if (icount_sleep) {
778 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
779 icount_timer_cb, NULL);
780 }
781
782 icount_align_option = align;
783
784 if (time_shift >= 0) {
785 timers_state.icount_time_shift = time_shift;
786 use_icount = 1;
787 return;
788 }
789
790 use_icount = 2;
791
792 /* 125MIPS seems a reasonable initial guess at the guest speed.
793 It will be corrected fairly quickly anyway. */
794 timers_state.icount_time_shift = 3;
795
796 /* Have both realtime and virtual time triggers for speed adjustment.
797 The realtime trigger catches emulated time passing too slowly,
798 the virtual time trigger catches emulated time passing too fast.
799 Realtime triggers occur even when idle, so use them less frequently
800 than VM triggers. */
801 timers_state.vm_clock_warp_start = -1;
802 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
803 icount_adjust_rt, NULL);
804 timer_mod(timers_state.icount_rt_timer,
805 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
806 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
807 icount_adjust_vm, NULL);
808 timer_mod(timers_state.icount_vm_timer,
809 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
810 NANOSECONDS_PER_SECOND / 10);
811 }
812
813 /***********************************************************/
814 /* TCG vCPU kick timer
815 *
816 * The kick timer is responsible for moving single threaded vCPU
817 * emulation on to the next vCPU. If more than one vCPU is running a
818 * timer event with force a cpu->exit so the next vCPU can get
819 * scheduled.
820 *
821 * The timer is removed if all vCPUs are idle and restarted again once
822 * idleness is complete.
823 */
824
825 static QEMUTimer *tcg_kick_vcpu_timer;
826 static CPUState *tcg_current_rr_cpu;
827
828 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
829
830 static inline int64_t qemu_tcg_next_kick(void)
831 {
832 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
833 }
834
835 /* Kick the currently round-robin scheduled vCPU to next */
836 static void qemu_cpu_kick_rr_next_cpu(void)
837 {
838 CPUState *cpu;
839 do {
840 cpu = atomic_mb_read(&tcg_current_rr_cpu);
841 if (cpu) {
842 cpu_exit(cpu);
843 }
844 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
845 }
846
847 /* Kick all RR vCPUs */
848 static void qemu_cpu_kick_rr_cpus(void)
849 {
850 CPUState *cpu;
851
852 CPU_FOREACH(cpu) {
853 cpu_exit(cpu);
854 };
855 }
856
857 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
858 {
859 }
860
861 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
862 {
863 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
864 qemu_notify_event();
865 return;
866 }
867
868 if (qemu_in_vcpu_thread()) {
869 /* A CPU is currently running; kick it back out to the
870 * tcg_cpu_exec() loop so it will recalculate its
871 * icount deadline immediately.
872 */
873 qemu_cpu_kick(current_cpu);
874 } else if (first_cpu) {
875 /* qemu_cpu_kick is not enough to kick a halted CPU out of
876 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
877 * causes cpu_thread_is_idle to return false. This way,
878 * handle_icount_deadline can run.
879 * If we have no CPUs at all for some reason, we don't
880 * need to do anything.
881 */
882 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
883 }
884 }
885
886 static void kick_tcg_thread(void *opaque)
887 {
888 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
889 qemu_cpu_kick_rr_next_cpu();
890 }
891
892 static void start_tcg_kick_timer(void)
893 {
894 assert(!mttcg_enabled);
895 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
896 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
897 kick_tcg_thread, NULL);
898 }
899 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
900 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
901 }
902 }
903
904 static void stop_tcg_kick_timer(void)
905 {
906 assert(!mttcg_enabled);
907 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
908 timer_del(tcg_kick_vcpu_timer);
909 }
910 }
911
912 /***********************************************************/
913 void hw_error(const char *fmt, ...)
914 {
915 va_list ap;
916 CPUState *cpu;
917
918 va_start(ap, fmt);
919 fprintf(stderr, "qemu: hardware error: ");
920 vfprintf(stderr, fmt, ap);
921 fprintf(stderr, "\n");
922 CPU_FOREACH(cpu) {
923 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
924 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
925 }
926 va_end(ap);
927 abort();
928 }
929
930 void cpu_synchronize_all_states(void)
931 {
932 CPUState *cpu;
933
934 CPU_FOREACH(cpu) {
935 cpu_synchronize_state(cpu);
936 }
937 }
938
939 void cpu_synchronize_all_post_reset(void)
940 {
941 CPUState *cpu;
942
943 CPU_FOREACH(cpu) {
944 cpu_synchronize_post_reset(cpu);
945 }
946 }
947
948 void cpu_synchronize_all_post_init(void)
949 {
950 CPUState *cpu;
951
952 CPU_FOREACH(cpu) {
953 cpu_synchronize_post_init(cpu);
954 }
955 }
956
957 void cpu_synchronize_all_pre_loadvm(void)
958 {
959 CPUState *cpu;
960
961 CPU_FOREACH(cpu) {
962 cpu_synchronize_pre_loadvm(cpu);
963 }
964 }
965
966 static int do_vm_stop(RunState state, bool send_stop)
967 {
968 int ret = 0;
969
970 if (runstate_is_running()) {
971 runstate_set(state);
972 cpu_disable_ticks();
973 pause_all_vcpus();
974 vm_state_notify(0, state);
975 if (send_stop) {
976 qapi_event_send_stop();
977 }
978 }
979
980 bdrv_drain_all();
981 ret = bdrv_flush_all();
982
983 return ret;
984 }
985
986 /* Special vm_stop() variant for terminating the process. Historically clients
987 * did not expect a QMP STOP event and so we need to retain compatibility.
988 */
989 int vm_shutdown(void)
990 {
991 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
992 }
993
994 static bool cpu_can_run(CPUState *cpu)
995 {
996 if (cpu->stop) {
997 return false;
998 }
999 if (cpu_is_stopped(cpu)) {
1000 return false;
1001 }
1002 return true;
1003 }
1004
1005 static void cpu_handle_guest_debug(CPUState *cpu)
1006 {
1007 gdb_set_stop_cpu(cpu);
1008 qemu_system_debug_request();
1009 cpu->stopped = true;
1010 }
1011
1012 #ifdef CONFIG_LINUX
1013 static void sigbus_reraise(void)
1014 {
1015 sigset_t set;
1016 struct sigaction action;
1017
1018 memset(&action, 0, sizeof(action));
1019 action.sa_handler = SIG_DFL;
1020 if (!sigaction(SIGBUS, &action, NULL)) {
1021 raise(SIGBUS);
1022 sigemptyset(&set);
1023 sigaddset(&set, SIGBUS);
1024 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1025 }
1026 perror("Failed to re-raise SIGBUS!\n");
1027 abort();
1028 }
1029
1030 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1031 {
1032 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1033 sigbus_reraise();
1034 }
1035
1036 if (current_cpu) {
1037 /* Called asynchronously in VCPU thread. */
1038 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1039 sigbus_reraise();
1040 }
1041 } else {
1042 /* Called synchronously (via signalfd) in main thread. */
1043 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1044 sigbus_reraise();
1045 }
1046 }
1047 }
1048
1049 static void qemu_init_sigbus(void)
1050 {
1051 struct sigaction action;
1052
1053 memset(&action, 0, sizeof(action));
1054 action.sa_flags = SA_SIGINFO;
1055 action.sa_sigaction = sigbus_handler;
1056 sigaction(SIGBUS, &action, NULL);
1057
1058 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1059 }
1060 #else /* !CONFIG_LINUX */
1061 static void qemu_init_sigbus(void)
1062 {
1063 }
1064 #endif /* !CONFIG_LINUX */
1065
1066 static QemuThread io_thread;
1067
1068 /* cpu creation */
1069 static QemuCond qemu_cpu_cond;
1070 /* system init */
1071 static QemuCond qemu_pause_cond;
1072
1073 void qemu_init_cpu_loop(void)
1074 {
1075 qemu_init_sigbus();
1076 qemu_cond_init(&qemu_cpu_cond);
1077 qemu_cond_init(&qemu_pause_cond);
1078 qemu_mutex_init(&qemu_global_mutex);
1079
1080 qemu_thread_get_self(&io_thread);
1081 }
1082
1083 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1084 {
1085 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1086 }
1087
1088 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1089 {
1090 if (kvm_destroy_vcpu(cpu) < 0) {
1091 error_report("kvm_destroy_vcpu failed");
1092 exit(EXIT_FAILURE);
1093 }
1094 }
1095
1096 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1097 {
1098 }
1099
1100 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1101 {
1102 g_assert(qemu_cpu_is_self(cpu));
1103 cpu->stop = false;
1104 cpu->stopped = true;
1105 if (exit) {
1106 cpu_exit(cpu);
1107 }
1108 qemu_cond_broadcast(&qemu_pause_cond);
1109 }
1110
1111 static void qemu_wait_io_event_common(CPUState *cpu)
1112 {
1113 atomic_mb_set(&cpu->thread_kicked, false);
1114 if (cpu->stop) {
1115 qemu_cpu_stop(cpu, false);
1116 }
1117 process_queued_cpu_work(cpu);
1118 }
1119
1120 static void qemu_tcg_rr_wait_io_event(void)
1121 {
1122 CPUState *cpu;
1123
1124 while (all_cpu_threads_idle()) {
1125 stop_tcg_kick_timer();
1126 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1127 }
1128
1129 start_tcg_kick_timer();
1130
1131 CPU_FOREACH(cpu) {
1132 qemu_wait_io_event_common(cpu);
1133 }
1134 }
1135
1136 static void qemu_wait_io_event(CPUState *cpu)
1137 {
1138 bool slept = false;
1139
1140 while (cpu_thread_is_idle(cpu)) {
1141 if (!slept) {
1142 slept = true;
1143 qemu_plugin_vcpu_idle_cb(cpu);
1144 }
1145 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1146 }
1147 if (slept) {
1148 qemu_plugin_vcpu_resume_cb(cpu);
1149 }
1150
1151 #ifdef _WIN32
1152 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1153 if (!tcg_enabled()) {
1154 SleepEx(0, TRUE);
1155 }
1156 #endif
1157 qemu_wait_io_event_common(cpu);
1158 }
1159
1160 static void *qemu_kvm_cpu_thread_fn(void *arg)
1161 {
1162 CPUState *cpu = arg;
1163 int r;
1164
1165 rcu_register_thread();
1166
1167 qemu_mutex_lock_iothread();
1168 qemu_thread_get_self(cpu->thread);
1169 cpu->thread_id = qemu_get_thread_id();
1170 cpu->can_do_io = 1;
1171 current_cpu = cpu;
1172
1173 r = kvm_init_vcpu(cpu);
1174 if (r < 0) {
1175 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1176 exit(1);
1177 }
1178
1179 kvm_init_cpu_signals(cpu);
1180
1181 /* signal CPU creation */
1182 cpu->created = true;
1183 qemu_cond_signal(&qemu_cpu_cond);
1184 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1185
1186 do {
1187 if (cpu_can_run(cpu)) {
1188 r = kvm_cpu_exec(cpu);
1189 if (r == EXCP_DEBUG) {
1190 cpu_handle_guest_debug(cpu);
1191 }
1192 }
1193 qemu_wait_io_event(cpu);
1194 } while (!cpu->unplug || cpu_can_run(cpu));
1195
1196 qemu_kvm_destroy_vcpu(cpu);
1197 cpu->created = false;
1198 qemu_cond_signal(&qemu_cpu_cond);
1199 qemu_mutex_unlock_iothread();
1200 rcu_unregister_thread();
1201 return NULL;
1202 }
1203
1204 static void *qemu_dummy_cpu_thread_fn(void *arg)
1205 {
1206 #ifdef _WIN32
1207 error_report("qtest is not supported under Windows");
1208 exit(1);
1209 #else
1210 CPUState *cpu = arg;
1211 sigset_t waitset;
1212 int r;
1213
1214 rcu_register_thread();
1215
1216 qemu_mutex_lock_iothread();
1217 qemu_thread_get_self(cpu->thread);
1218 cpu->thread_id = qemu_get_thread_id();
1219 cpu->can_do_io = 1;
1220 current_cpu = cpu;
1221
1222 sigemptyset(&waitset);
1223 sigaddset(&waitset, SIG_IPI);
1224
1225 /* signal CPU creation */
1226 cpu->created = true;
1227 qemu_cond_signal(&qemu_cpu_cond);
1228 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1229
1230 do {
1231 qemu_mutex_unlock_iothread();
1232 do {
1233 int sig;
1234 r = sigwait(&waitset, &sig);
1235 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1236 if (r == -1) {
1237 perror("sigwait");
1238 exit(1);
1239 }
1240 qemu_mutex_lock_iothread();
1241 qemu_wait_io_event(cpu);
1242 } while (!cpu->unplug);
1243
1244 qemu_mutex_unlock_iothread();
1245 rcu_unregister_thread();
1246 return NULL;
1247 #endif
1248 }
1249
1250 static int64_t tcg_get_icount_limit(void)
1251 {
1252 int64_t deadline;
1253
1254 if (replay_mode != REPLAY_MODE_PLAY) {
1255 /*
1256 * Include all the timers, because they may need an attention.
1257 * Too long CPU execution may create unnecessary delay in UI.
1258 */
1259 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1260 QEMU_TIMER_ATTR_ALL);
1261 /* Check realtime timers, because they help with input processing */
1262 deadline = qemu_soonest_timeout(deadline,
1263 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1264 QEMU_TIMER_ATTR_ALL));
1265
1266 /* Maintain prior (possibly buggy) behaviour where if no deadline
1267 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1268 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1269 * nanoseconds.
1270 */
1271 if ((deadline < 0) || (deadline > INT32_MAX)) {
1272 deadline = INT32_MAX;
1273 }
1274
1275 return qemu_icount_round(deadline);
1276 } else {
1277 return replay_get_instructions();
1278 }
1279 }
1280
1281 static void notify_aio_contexts(void)
1282 {
1283 /* Wake up other AioContexts. */
1284 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1285 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1286 }
1287
1288 static void handle_icount_deadline(void)
1289 {
1290 assert(qemu_in_vcpu_thread());
1291 if (use_icount) {
1292 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1293 QEMU_TIMER_ATTR_ALL);
1294
1295 if (deadline == 0) {
1296 notify_aio_contexts();
1297 }
1298 }
1299 }
1300
1301 static void prepare_icount_for_run(CPUState *cpu)
1302 {
1303 if (use_icount) {
1304 int insns_left;
1305
1306 /* These should always be cleared by process_icount_data after
1307 * each vCPU execution. However u16.high can be raised
1308 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1309 */
1310 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
1311 g_assert(cpu->icount_extra == 0);
1312
1313 cpu->icount_budget = tcg_get_icount_limit();
1314 insns_left = MIN(0xffff, cpu->icount_budget);
1315 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1316 cpu->icount_extra = cpu->icount_budget - insns_left;
1317
1318 replay_mutex_lock();
1319
1320 if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
1321 notify_aio_contexts();
1322 }
1323 }
1324 }
1325
1326 static void process_icount_data(CPUState *cpu)
1327 {
1328 if (use_icount) {
1329 /* Account for executed instructions */
1330 cpu_update_icount(cpu);
1331
1332 /* Reset the counters */
1333 cpu_neg(cpu)->icount_decr.u16.low = 0;
1334 cpu->icount_extra = 0;
1335 cpu->icount_budget = 0;
1336
1337 replay_account_executed_instructions();
1338
1339 replay_mutex_unlock();
1340 }
1341 }
1342
1343
1344 static int tcg_cpu_exec(CPUState *cpu)
1345 {
1346 int ret;
1347 #ifdef CONFIG_PROFILER
1348 int64_t ti;
1349 #endif
1350
1351 assert(tcg_enabled());
1352 #ifdef CONFIG_PROFILER
1353 ti = profile_getclock();
1354 #endif
1355 cpu_exec_start(cpu);
1356 ret = cpu_exec(cpu);
1357 cpu_exec_end(cpu);
1358 #ifdef CONFIG_PROFILER
1359 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1360 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1361 #endif
1362 return ret;
1363 }
1364
1365 /* Destroy any remaining vCPUs which have been unplugged and have
1366 * finished running
1367 */
1368 static void deal_with_unplugged_cpus(void)
1369 {
1370 CPUState *cpu;
1371
1372 CPU_FOREACH(cpu) {
1373 if (cpu->unplug && !cpu_can_run(cpu)) {
1374 qemu_tcg_destroy_vcpu(cpu);
1375 cpu->created = false;
1376 qemu_cond_signal(&qemu_cpu_cond);
1377 break;
1378 }
1379 }
1380 }
1381
1382 /* Single-threaded TCG
1383 *
1384 * In the single-threaded case each vCPU is simulated in turn. If
1385 * there is more than a single vCPU we create a simple timer to kick
1386 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1387 * This is done explicitly rather than relying on side-effects
1388 * elsewhere.
1389 */
1390
1391 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1392 {
1393 CPUState *cpu = arg;
1394
1395 assert(tcg_enabled());
1396 rcu_register_thread();
1397 tcg_register_thread();
1398
1399 qemu_mutex_lock_iothread();
1400 qemu_thread_get_self(cpu->thread);
1401
1402 cpu->thread_id = qemu_get_thread_id();
1403 cpu->created = true;
1404 cpu->can_do_io = 1;
1405 qemu_cond_signal(&qemu_cpu_cond);
1406 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1407
1408 /* wait for initial kick-off after machine start */
1409 while (first_cpu->stopped) {
1410 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1411
1412 /* process any pending work */
1413 CPU_FOREACH(cpu) {
1414 current_cpu = cpu;
1415 qemu_wait_io_event_common(cpu);
1416 }
1417 }
1418
1419 start_tcg_kick_timer();
1420
1421 cpu = first_cpu;
1422
1423 /* process any pending work */
1424 cpu->exit_request = 1;
1425
1426 while (1) {
1427 qemu_mutex_unlock_iothread();
1428 replay_mutex_lock();
1429 qemu_mutex_lock_iothread();
1430 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1431 qemu_account_warp_timer();
1432
1433 /* Run the timers here. This is much more efficient than
1434 * waking up the I/O thread and waiting for completion.
1435 */
1436 handle_icount_deadline();
1437
1438 replay_mutex_unlock();
1439
1440 if (!cpu) {
1441 cpu = first_cpu;
1442 }
1443
1444 while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
1445
1446 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1447 current_cpu = cpu;
1448
1449 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1450 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1451
1452 if (cpu_can_run(cpu)) {
1453 int r;
1454
1455 qemu_mutex_unlock_iothread();
1456 prepare_icount_for_run(cpu);
1457
1458 r = tcg_cpu_exec(cpu);
1459
1460 process_icount_data(cpu);
1461 qemu_mutex_lock_iothread();
1462
1463 if (r == EXCP_DEBUG) {
1464 cpu_handle_guest_debug(cpu);
1465 break;
1466 } else if (r == EXCP_ATOMIC) {
1467 qemu_mutex_unlock_iothread();
1468 cpu_exec_step_atomic(cpu);
1469 qemu_mutex_lock_iothread();
1470 break;
1471 }
1472 } else if (cpu->stop) {
1473 if (cpu->unplug) {
1474 cpu = CPU_NEXT(cpu);
1475 }
1476 break;
1477 }
1478
1479 cpu = CPU_NEXT(cpu);
1480 } /* while (cpu && !cpu->exit_request).. */
1481
1482 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1483 atomic_set(&tcg_current_rr_cpu, NULL);
1484
1485 if (cpu && cpu->exit_request) {
1486 atomic_mb_set(&cpu->exit_request, 0);
1487 }
1488
1489 if (use_icount && all_cpu_threads_idle()) {
1490 /*
1491 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1492 * in the main_loop, wake it up in order to start the warp timer.
1493 */
1494 qemu_notify_event();
1495 }
1496
1497 qemu_tcg_rr_wait_io_event();
1498 deal_with_unplugged_cpus();
1499 }
1500
1501 rcu_unregister_thread();
1502 return NULL;
1503 }
1504
1505 static void *qemu_hax_cpu_thread_fn(void *arg)
1506 {
1507 CPUState *cpu = arg;
1508 int r;
1509
1510 rcu_register_thread();
1511 qemu_mutex_lock_iothread();
1512 qemu_thread_get_self(cpu->thread);
1513
1514 cpu->thread_id = qemu_get_thread_id();
1515 cpu->created = true;
1516 current_cpu = cpu;
1517
1518 hax_init_vcpu(cpu);
1519 qemu_cond_signal(&qemu_cpu_cond);
1520 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1521
1522 do {
1523 if (cpu_can_run(cpu)) {
1524 r = hax_smp_cpu_exec(cpu);
1525 if (r == EXCP_DEBUG) {
1526 cpu_handle_guest_debug(cpu);
1527 }
1528 }
1529
1530 qemu_wait_io_event(cpu);
1531 } while (!cpu->unplug || cpu_can_run(cpu));
1532 rcu_unregister_thread();
1533 return NULL;
1534 }
1535
1536 /* The HVF-specific vCPU thread function. This one should only run when the host
1537 * CPU supports the VMX "unrestricted guest" feature. */
1538 static void *qemu_hvf_cpu_thread_fn(void *arg)
1539 {
1540 CPUState *cpu = arg;
1541
1542 int r;
1543
1544 assert(hvf_enabled());
1545
1546 rcu_register_thread();
1547
1548 qemu_mutex_lock_iothread();
1549 qemu_thread_get_self(cpu->thread);
1550
1551 cpu->thread_id = qemu_get_thread_id();
1552 cpu->can_do_io = 1;
1553 current_cpu = cpu;
1554
1555 hvf_init_vcpu(cpu);
1556
1557 /* signal CPU creation */
1558 cpu->created = true;
1559 qemu_cond_signal(&qemu_cpu_cond);
1560 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1561
1562 do {
1563 if (cpu_can_run(cpu)) {
1564 r = hvf_vcpu_exec(cpu);
1565 if (r == EXCP_DEBUG) {
1566 cpu_handle_guest_debug(cpu);
1567 }
1568 }
1569 qemu_wait_io_event(cpu);
1570 } while (!cpu->unplug || cpu_can_run(cpu));
1571
1572 hvf_vcpu_destroy(cpu);
1573 cpu->created = false;
1574 qemu_cond_signal(&qemu_cpu_cond);
1575 qemu_mutex_unlock_iothread();
1576 rcu_unregister_thread();
1577 return NULL;
1578 }
1579
1580 static void *qemu_whpx_cpu_thread_fn(void *arg)
1581 {
1582 CPUState *cpu = arg;
1583 int r;
1584
1585 rcu_register_thread();
1586
1587 qemu_mutex_lock_iothread();
1588 qemu_thread_get_self(cpu->thread);
1589 cpu->thread_id = qemu_get_thread_id();
1590 current_cpu = cpu;
1591
1592 r = whpx_init_vcpu(cpu);
1593 if (r < 0) {
1594 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1595 exit(1);
1596 }
1597
1598 /* signal CPU creation */
1599 cpu->created = true;
1600 qemu_cond_signal(&qemu_cpu_cond);
1601 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1602
1603 do {
1604 if (cpu_can_run(cpu)) {
1605 r = whpx_vcpu_exec(cpu);
1606 if (r == EXCP_DEBUG) {
1607 cpu_handle_guest_debug(cpu);
1608 }
1609 }
1610 while (cpu_thread_is_idle(cpu)) {
1611 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1612 }
1613 qemu_wait_io_event_common(cpu);
1614 } while (!cpu->unplug || cpu_can_run(cpu));
1615
1616 whpx_destroy_vcpu(cpu);
1617 cpu->created = false;
1618 qemu_cond_signal(&qemu_cpu_cond);
1619 qemu_mutex_unlock_iothread();
1620 rcu_unregister_thread();
1621 return NULL;
1622 }
1623
1624 #ifdef _WIN32
1625 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1626 {
1627 }
1628 #endif
1629
1630 /* Multi-threaded TCG
1631 *
1632 * In the multi-threaded case each vCPU has its own thread. The TLS
1633 * variable current_cpu can be used deep in the code to find the
1634 * current CPUState for a given thread.
1635 */
1636
1637 static void *qemu_tcg_cpu_thread_fn(void *arg)
1638 {
1639 CPUState *cpu = arg;
1640
1641 assert(tcg_enabled());
1642 g_assert(!use_icount);
1643
1644 rcu_register_thread();
1645 tcg_register_thread();
1646
1647 qemu_mutex_lock_iothread();
1648 qemu_thread_get_self(cpu->thread);
1649
1650 cpu->thread_id = qemu_get_thread_id();
1651 cpu->created = true;
1652 cpu->can_do_io = 1;
1653 current_cpu = cpu;
1654 qemu_cond_signal(&qemu_cpu_cond);
1655 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1656
1657 /* process any pending work */
1658 cpu->exit_request = 1;
1659
1660 do {
1661 if (cpu_can_run(cpu)) {
1662 int r;
1663 qemu_mutex_unlock_iothread();
1664 r = tcg_cpu_exec(cpu);
1665 qemu_mutex_lock_iothread();
1666 switch (r) {
1667 case EXCP_DEBUG:
1668 cpu_handle_guest_debug(cpu);
1669 break;
1670 case EXCP_HALTED:
1671 /* during start-up the vCPU is reset and the thread is
1672 * kicked several times. If we don't ensure we go back
1673 * to sleep in the halted state we won't cleanly
1674 * start-up when the vCPU is enabled.
1675 *
1676 * cpu->halted should ensure we sleep in wait_io_event
1677 */
1678 g_assert(cpu->halted);
1679 break;
1680 case EXCP_ATOMIC:
1681 qemu_mutex_unlock_iothread();
1682 cpu_exec_step_atomic(cpu);
1683 qemu_mutex_lock_iothread();
1684 default:
1685 /* Ignore everything else? */
1686 break;
1687 }
1688 }
1689
1690 atomic_mb_set(&cpu->exit_request, 0);
1691 qemu_wait_io_event(cpu);
1692 } while (!cpu->unplug || cpu_can_run(cpu));
1693
1694 qemu_tcg_destroy_vcpu(cpu);
1695 cpu->created = false;
1696 qemu_cond_signal(&qemu_cpu_cond);
1697 qemu_mutex_unlock_iothread();
1698 rcu_unregister_thread();
1699 return NULL;
1700 }
1701
1702 static void qemu_cpu_kick_thread(CPUState *cpu)
1703 {
1704 #ifndef _WIN32
1705 int err;
1706
1707 if (cpu->thread_kicked) {
1708 return;
1709 }
1710 cpu->thread_kicked = true;
1711 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1712 if (err && err != ESRCH) {
1713 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1714 exit(1);
1715 }
1716 #else /* _WIN32 */
1717 if (!qemu_cpu_is_self(cpu)) {
1718 if (whpx_enabled()) {
1719 whpx_vcpu_kick(cpu);
1720 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1721 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1722 __func__, GetLastError());
1723 exit(1);
1724 }
1725 }
1726 #endif
1727 }
1728
1729 void qemu_cpu_kick(CPUState *cpu)
1730 {
1731 qemu_cond_broadcast(cpu->halt_cond);
1732 if (tcg_enabled()) {
1733 if (qemu_tcg_mttcg_enabled()) {
1734 cpu_exit(cpu);
1735 } else {
1736 qemu_cpu_kick_rr_cpus();
1737 }
1738 } else {
1739 if (hax_enabled()) {
1740 /*
1741 * FIXME: race condition with the exit_request check in
1742 * hax_vcpu_hax_exec
1743 */
1744 cpu->exit_request = 1;
1745 }
1746 qemu_cpu_kick_thread(cpu);
1747 }
1748 }
1749
1750 void qemu_cpu_kick_self(void)
1751 {
1752 assert(current_cpu);
1753 qemu_cpu_kick_thread(current_cpu);
1754 }
1755
1756 bool qemu_cpu_is_self(CPUState *cpu)
1757 {
1758 return qemu_thread_is_self(cpu->thread);
1759 }
1760
1761 bool qemu_in_vcpu_thread(void)
1762 {
1763 return current_cpu && qemu_cpu_is_self(current_cpu);
1764 }
1765
1766 static __thread bool iothread_locked = false;
1767
1768 bool qemu_mutex_iothread_locked(void)
1769 {
1770 return iothread_locked;
1771 }
1772
1773 /*
1774 * The BQL is taken from so many places that it is worth profiling the
1775 * callers directly, instead of funneling them all through a single function.
1776 */
1777 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1778 {
1779 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1780
1781 g_assert(!qemu_mutex_iothread_locked());
1782 bql_lock(&qemu_global_mutex, file, line);
1783 iothread_locked = true;
1784 }
1785
1786 void qemu_mutex_unlock_iothread(void)
1787 {
1788 g_assert(qemu_mutex_iothread_locked());
1789 iothread_locked = false;
1790 qemu_mutex_unlock(&qemu_global_mutex);
1791 }
1792
1793 void qemu_cond_wait_iothread(QemuCond *cond)
1794 {
1795 qemu_cond_wait(cond, &qemu_global_mutex);
1796 }
1797
1798 void qemu_cond_timedwait_iothread(QemuCond *cond, int ms)
1799 {
1800 qemu_cond_timedwait(cond, &qemu_global_mutex, ms);
1801 }
1802
1803 static bool all_vcpus_paused(void)
1804 {
1805 CPUState *cpu;
1806
1807 CPU_FOREACH(cpu) {
1808 if (!cpu->stopped) {
1809 return false;
1810 }
1811 }
1812
1813 return true;
1814 }
1815
1816 void pause_all_vcpus(void)
1817 {
1818 CPUState *cpu;
1819
1820 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1821 CPU_FOREACH(cpu) {
1822 if (qemu_cpu_is_self(cpu)) {
1823 qemu_cpu_stop(cpu, true);
1824 } else {
1825 cpu->stop = true;
1826 qemu_cpu_kick(cpu);
1827 }
1828 }
1829
1830 /* We need to drop the replay_lock so any vCPU threads woken up
1831 * can finish their replay tasks
1832 */
1833 replay_mutex_unlock();
1834
1835 while (!all_vcpus_paused()) {
1836 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1837 CPU_FOREACH(cpu) {
1838 qemu_cpu_kick(cpu);
1839 }
1840 }
1841
1842 qemu_mutex_unlock_iothread();
1843 replay_mutex_lock();
1844 qemu_mutex_lock_iothread();
1845 }
1846
1847 void cpu_resume(CPUState *cpu)
1848 {
1849 cpu->stop = false;
1850 cpu->stopped = false;
1851 qemu_cpu_kick(cpu);
1852 }
1853
1854 void resume_all_vcpus(void)
1855 {
1856 CPUState *cpu;
1857
1858 if (!runstate_is_running()) {
1859 return;
1860 }
1861
1862 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1863 CPU_FOREACH(cpu) {
1864 cpu_resume(cpu);
1865 }
1866 }
1867
1868 void cpu_remove_sync(CPUState *cpu)
1869 {
1870 cpu->stop = true;
1871 cpu->unplug = true;
1872 qemu_cpu_kick(cpu);
1873 qemu_mutex_unlock_iothread();
1874 qemu_thread_join(cpu->thread);
1875 qemu_mutex_lock_iothread();
1876 }
1877
1878 /* For temporary buffers for forming a name */
1879 #define VCPU_THREAD_NAME_SIZE 16
1880
1881 static void qemu_tcg_init_vcpu(CPUState *cpu)
1882 {
1883 char thread_name[VCPU_THREAD_NAME_SIZE];
1884 static QemuCond *single_tcg_halt_cond;
1885 static QemuThread *single_tcg_cpu_thread;
1886 static int tcg_region_inited;
1887
1888 assert(tcg_enabled());
1889 /*
1890 * Initialize TCG regions--once. Now is a good time, because:
1891 * (1) TCG's init context, prologue and target globals have been set up.
1892 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1893 * -accel flag is processed, so the check doesn't work then).
1894 */
1895 if (!tcg_region_inited) {
1896 tcg_region_inited = 1;
1897 tcg_region_init();
1898 }
1899
1900 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1901 cpu->thread = g_malloc0(sizeof(QemuThread));
1902 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1903 qemu_cond_init(cpu->halt_cond);
1904
1905 if (qemu_tcg_mttcg_enabled()) {
1906 /* create a thread per vCPU with TCG (MTTCG) */
1907 parallel_cpus = true;
1908 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1909 cpu->cpu_index);
1910
1911 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1912 cpu, QEMU_THREAD_JOINABLE);
1913
1914 } else {
1915 /* share a single thread for all cpus with TCG */
1916 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1917 qemu_thread_create(cpu->thread, thread_name,
1918 qemu_tcg_rr_cpu_thread_fn,
1919 cpu, QEMU_THREAD_JOINABLE);
1920
1921 single_tcg_halt_cond = cpu->halt_cond;
1922 single_tcg_cpu_thread = cpu->thread;
1923 }
1924 #ifdef _WIN32
1925 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1926 #endif
1927 } else {
1928 /* For non-MTTCG cases we share the thread */
1929 cpu->thread = single_tcg_cpu_thread;
1930 cpu->halt_cond = single_tcg_halt_cond;
1931 cpu->thread_id = first_cpu->thread_id;
1932 cpu->can_do_io = 1;
1933 cpu->created = true;
1934 }
1935 }
1936
1937 static void qemu_hax_start_vcpu(CPUState *cpu)
1938 {
1939 char thread_name[VCPU_THREAD_NAME_SIZE];
1940
1941 cpu->thread = g_malloc0(sizeof(QemuThread));
1942 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1943 qemu_cond_init(cpu->halt_cond);
1944
1945 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1946 cpu->cpu_index);
1947 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1948 cpu, QEMU_THREAD_JOINABLE);
1949 #ifdef _WIN32
1950 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1951 #endif
1952 }
1953
1954 static void qemu_kvm_start_vcpu(CPUState *cpu)
1955 {
1956 char thread_name[VCPU_THREAD_NAME_SIZE];
1957
1958 cpu->thread = g_malloc0(sizeof(QemuThread));
1959 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1960 qemu_cond_init(cpu->halt_cond);
1961 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1962 cpu->cpu_index);
1963 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1964 cpu, QEMU_THREAD_JOINABLE);
1965 }
1966
1967 static void qemu_hvf_start_vcpu(CPUState *cpu)
1968 {
1969 char thread_name[VCPU_THREAD_NAME_SIZE];
1970
1971 /* HVF currently does not support TCG, and only runs in
1972 * unrestricted-guest mode. */
1973 assert(hvf_enabled());
1974
1975 cpu->thread = g_malloc0(sizeof(QemuThread));
1976 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1977 qemu_cond_init(cpu->halt_cond);
1978
1979 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1980 cpu->cpu_index);
1981 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1982 cpu, QEMU_THREAD_JOINABLE);
1983 }
1984
1985 static void qemu_whpx_start_vcpu(CPUState *cpu)
1986 {
1987 char thread_name[VCPU_THREAD_NAME_SIZE];
1988
1989 cpu->thread = g_malloc0(sizeof(QemuThread));
1990 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1991 qemu_cond_init(cpu->halt_cond);
1992 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
1993 cpu->cpu_index);
1994 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
1995 cpu, QEMU_THREAD_JOINABLE);
1996 #ifdef _WIN32
1997 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1998 #endif
1999 }
2000
2001 static void qemu_dummy_start_vcpu(CPUState *cpu)
2002 {
2003 char thread_name[VCPU_THREAD_NAME_SIZE];
2004
2005 cpu->thread = g_malloc0(sizeof(QemuThread));
2006 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2007 qemu_cond_init(cpu->halt_cond);
2008 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2009 cpu->cpu_index);
2010 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2011 QEMU_THREAD_JOINABLE);
2012 }
2013
2014 void qemu_init_vcpu(CPUState *cpu)
2015 {
2016 MachineState *ms = MACHINE(qdev_get_machine());
2017
2018 cpu->nr_cores = ms->smp.cores;
2019 cpu->nr_threads = ms->smp.threads;
2020 cpu->stopped = true;
2021 cpu->random_seed = qemu_guest_random_seed_thread_part1();
2022
2023 if (!cpu->as) {
2024 /* If the target cpu hasn't set up any address spaces itself,
2025 * give it the default one.
2026 */
2027 cpu->num_ases = 1;
2028 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2029 }
2030
2031 if (kvm_enabled()) {
2032 qemu_kvm_start_vcpu(cpu);
2033 } else if (hax_enabled()) {
2034 qemu_hax_start_vcpu(cpu);
2035 } else if (hvf_enabled()) {
2036 qemu_hvf_start_vcpu(cpu);
2037 } else if (tcg_enabled()) {
2038 qemu_tcg_init_vcpu(cpu);
2039 } else if (whpx_enabled()) {
2040 qemu_whpx_start_vcpu(cpu);
2041 } else {
2042 qemu_dummy_start_vcpu(cpu);
2043 }
2044
2045 while (!cpu->created) {
2046 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2047 }
2048 }
2049
2050 void cpu_stop_current(void)
2051 {
2052 if (current_cpu) {
2053 current_cpu->stop = true;
2054 cpu_exit(current_cpu);
2055 }
2056 }
2057
2058 int vm_stop(RunState state)
2059 {
2060 if (qemu_in_vcpu_thread()) {
2061 qemu_system_vmstop_request_prepare();
2062 qemu_system_vmstop_request(state);
2063 /*
2064 * FIXME: should not return to device code in case
2065 * vm_stop() has been requested.
2066 */
2067 cpu_stop_current();
2068 return 0;
2069 }
2070
2071 return do_vm_stop(state, true);
2072 }
2073
2074 /**
2075 * Prepare for (re)starting the VM.
2076 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2077 * running or in case of an error condition), 0 otherwise.
2078 */
2079 int vm_prepare_start(void)
2080 {
2081 RunState requested;
2082
2083 qemu_vmstop_requested(&requested);
2084 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2085 return -1;
2086 }
2087
2088 /* Ensure that a STOP/RESUME pair of events is emitted if a
2089 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2090 * example, according to documentation is always followed by
2091 * the STOP event.
2092 */
2093 if (runstate_is_running()) {
2094 qapi_event_send_stop();
2095 qapi_event_send_resume();
2096 return -1;
2097 }
2098
2099 /* We are sending this now, but the CPUs will be resumed shortly later */
2100 qapi_event_send_resume();
2101
2102 cpu_enable_ticks();
2103 runstate_set(RUN_STATE_RUNNING);
2104 vm_state_notify(1, RUN_STATE_RUNNING);
2105 return 0;
2106 }
2107
2108 void vm_start(void)
2109 {
2110 if (!vm_prepare_start()) {
2111 resume_all_vcpus();
2112 }
2113 }
2114
2115 /* does a state transition even if the VM is already stopped,
2116 current state is forgotten forever */
2117 int vm_stop_force_state(RunState state)
2118 {
2119 if (runstate_is_running()) {
2120 return vm_stop(state);
2121 } else {
2122 runstate_set(state);
2123
2124 bdrv_drain_all();
2125 /* Make sure to return an error if the flush in a previous vm_stop()
2126 * failed. */
2127 return bdrv_flush_all();
2128 }
2129 }
2130
2131 void list_cpus(const char *optarg)
2132 {
2133 /* XXX: implement xxx_cpu_list for targets that still miss it */
2134 #if defined(cpu_list)
2135 cpu_list();
2136 #endif
2137 }
2138
2139 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2140 bool has_cpu, int64_t cpu_index, Error **errp)
2141 {
2142 FILE *f;
2143 uint32_t l;
2144 CPUState *cpu;
2145 uint8_t buf[1024];
2146 int64_t orig_addr = addr, orig_size = size;
2147
2148 if (!has_cpu) {
2149 cpu_index = 0;
2150 }
2151
2152 cpu = qemu_get_cpu(cpu_index);
2153 if (cpu == NULL) {
2154 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2155 "a CPU number");
2156 return;
2157 }
2158
2159 f = fopen(filename, "wb");
2160 if (!f) {
2161 error_setg_file_open(errp, errno, filename);
2162 return;
2163 }
2164
2165 while (size != 0) {
2166 l = sizeof(buf);
2167 if (l > size)
2168 l = size;
2169 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2170 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2171 " specified", orig_addr, orig_size);
2172 goto exit;
2173 }
2174 if (fwrite(buf, 1, l, f) != l) {
2175 error_setg(errp, QERR_IO_ERROR);
2176 goto exit;
2177 }
2178 addr += l;
2179 size -= l;
2180 }
2181
2182 exit:
2183 fclose(f);
2184 }
2185
2186 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2187 Error **errp)
2188 {
2189 FILE *f;
2190 uint32_t l;
2191 uint8_t buf[1024];
2192
2193 f = fopen(filename, "wb");
2194 if (!f) {
2195 error_setg_file_open(errp, errno, filename);
2196 return;
2197 }
2198
2199 while (size != 0) {
2200 l = sizeof(buf);
2201 if (l > size)
2202 l = size;
2203 cpu_physical_memory_read(addr, buf, l);
2204 if (fwrite(buf, 1, l, f) != l) {
2205 error_setg(errp, QERR_IO_ERROR);
2206 goto exit;
2207 }
2208 addr += l;
2209 size -= l;
2210 }
2211
2212 exit:
2213 fclose(f);
2214 }
2215
2216 void qmp_inject_nmi(Error **errp)
2217 {
2218 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2219 }
2220
2221 void dump_drift_info(void)
2222 {
2223 if (!use_icount) {
2224 return;
2225 }
2226
2227 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2228 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2229 if (icount_align_option) {
2230 qemu_printf("Max guest delay %"PRIi64" ms\n",
2231 -max_delay / SCALE_MS);
2232 qemu_printf("Max guest advance %"PRIi64" ms\n",
2233 max_advance / SCALE_MS);
2234 } else {
2235 qemu_printf("Max guest delay NA\n");
2236 qemu_printf("Max guest advance NA\n");
2237 }
2238 }