Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "migration/vmstate.h"
29 #include "monitor/monitor.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-commands-misc.h"
32 #include "qapi/qapi-events-run-state.h"
33 #include "qapi/qmp/qerror.h"
34 #include "qemu/error-report.h"
35 #include "qemu/qemu-print.h"
36 #include "sysemu/tcg.h"
37 #include "sysemu/block-backend.h"
38 #include "exec/gdbstub.h"
39 #include "sysemu/dma.h"
40 #include "sysemu/hw_accel.h"
41 #include "sysemu/kvm.h"
42 #include "sysemu/hax.h"
43 #include "sysemu/hvf.h"
44 #include "sysemu/whpx.h"
45 #include "exec/exec-all.h"
46
47 #include "qemu/thread.h"
48 #include "qemu/plugin.h"
49 #include "sysemu/cpus.h"
50 #include "sysemu/qtest.h"
51 #include "qemu/main-loop.h"
52 #include "qemu/option.h"
53 #include "qemu/bitmap.h"
54 #include "qemu/seqlock.h"
55 #include "qemu/guest-random.h"
56 #include "tcg/tcg.h"
57 #include "hw/nmi.h"
58 #include "sysemu/replay.h"
59 #include "sysemu/runstate.h"
60 #include "hw/boards.h"
61 #include "hw/hw.h"
62
63 #ifdef CONFIG_LINUX
64
65 #include <sys/prctl.h>
66
67 #ifndef PR_MCE_KILL
68 #define PR_MCE_KILL 33
69 #endif
70
71 #ifndef PR_MCE_KILL_SET
72 #define PR_MCE_KILL_SET 1
73 #endif
74
75 #ifndef PR_MCE_KILL_EARLY
76 #define PR_MCE_KILL_EARLY 1
77 #endif
78
79 #endif /* CONFIG_LINUX */
80
81 static QemuMutex qemu_global_mutex;
82
83 int64_t max_delay;
84 int64_t max_advance;
85
86 /* vcpu throttling controls */
87 static QEMUTimer *throttle_timer;
88 static unsigned int throttle_percentage;
89
90 #define CPU_THROTTLE_PCT_MIN 1
91 #define CPU_THROTTLE_PCT_MAX 99
92 #define CPU_THROTTLE_TIMESLICE_NS 10000000
93
94 bool cpu_is_stopped(CPUState *cpu)
95 {
96 return cpu->stopped || !runstate_is_running();
97 }
98
99 static bool cpu_thread_is_idle(CPUState *cpu)
100 {
101 if (cpu->stop || cpu->queued_work_first) {
102 return false;
103 }
104 if (cpu_is_stopped(cpu)) {
105 return true;
106 }
107 if (!cpu->halted || cpu_has_work(cpu) ||
108 kvm_halt_in_kernel()) {
109 return false;
110 }
111 return true;
112 }
113
114 static bool all_cpu_threads_idle(void)
115 {
116 CPUState *cpu;
117
118 CPU_FOREACH(cpu) {
119 if (!cpu_thread_is_idle(cpu)) {
120 return false;
121 }
122 }
123 return true;
124 }
125
126 /***********************************************************/
127 /* guest cycle counter */
128
129 /* Protected by TimersState seqlock */
130
131 static bool icount_sleep = true;
132 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
133 #define MAX_ICOUNT_SHIFT 10
134
135 typedef struct TimersState {
136 /* Protected by BQL. */
137 int64_t cpu_ticks_prev;
138 int64_t cpu_ticks_offset;
139
140 /* Protect fields that can be respectively read outside the
141 * BQL, and written from multiple threads.
142 */
143 QemuSeqLock vm_clock_seqlock;
144 QemuSpin vm_clock_lock;
145
146 int16_t cpu_ticks_enabled;
147
148 /* Conversion factor from emulated instructions to virtual clock ticks. */
149 int16_t icount_time_shift;
150
151 /* Compensate for varying guest execution speed. */
152 int64_t qemu_icount_bias;
153
154 int64_t vm_clock_warp_start;
155 int64_t cpu_clock_offset;
156
157 /* Only written by TCG thread */
158 int64_t qemu_icount;
159
160 /* for adjusting icount */
161 QEMUTimer *icount_rt_timer;
162 QEMUTimer *icount_vm_timer;
163 QEMUTimer *icount_warp_timer;
164 } TimersState;
165
166 static TimersState timers_state;
167 bool mttcg_enabled;
168
169
170 /* The current number of executed instructions is based on what we
171 * originally budgeted minus the current state of the decrementing
172 * icount counters in extra/u16.low.
173 */
174 static int64_t cpu_get_icount_executed(CPUState *cpu)
175 {
176 return (cpu->icount_budget -
177 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
178 }
179
180 /*
181 * Update the global shared timer_state.qemu_icount to take into
182 * account executed instructions. This is done by the TCG vCPU
183 * thread so the main-loop can see time has moved forward.
184 */
185 static void cpu_update_icount_locked(CPUState *cpu)
186 {
187 int64_t executed = cpu_get_icount_executed(cpu);
188 cpu->icount_budget -= executed;
189
190 atomic_set_i64(&timers_state.qemu_icount,
191 timers_state.qemu_icount + executed);
192 }
193
194 /*
195 * Update the global shared timer_state.qemu_icount to take into
196 * account executed instructions. This is done by the TCG vCPU
197 * thread so the main-loop can see time has moved forward.
198 */
199 void cpu_update_icount(CPUState *cpu)
200 {
201 seqlock_write_lock(&timers_state.vm_clock_seqlock,
202 &timers_state.vm_clock_lock);
203 cpu_update_icount_locked(cpu);
204 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
205 &timers_state.vm_clock_lock);
206 }
207
208 static int64_t cpu_get_icount_raw_locked(void)
209 {
210 CPUState *cpu = current_cpu;
211
212 if (cpu && cpu->running) {
213 if (!cpu->can_do_io) {
214 error_report("Bad icount read");
215 exit(1);
216 }
217 /* Take into account what has run */
218 cpu_update_icount_locked(cpu);
219 }
220 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
221 return atomic_read_i64(&timers_state.qemu_icount);
222 }
223
224 static int64_t cpu_get_icount_locked(void)
225 {
226 int64_t icount = cpu_get_icount_raw_locked();
227 return atomic_read_i64(&timers_state.qemu_icount_bias) +
228 cpu_icount_to_ns(icount);
229 }
230
231 int64_t cpu_get_icount_raw(void)
232 {
233 int64_t icount;
234 unsigned start;
235
236 do {
237 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
238 icount = cpu_get_icount_raw_locked();
239 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
240
241 return icount;
242 }
243
244 /* Return the virtual CPU time, based on the instruction counter. */
245 int64_t cpu_get_icount(void)
246 {
247 int64_t icount;
248 unsigned start;
249
250 do {
251 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
252 icount = cpu_get_icount_locked();
253 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
254
255 return icount;
256 }
257
258 int64_t cpu_icount_to_ns(int64_t icount)
259 {
260 return icount << atomic_read(&timers_state.icount_time_shift);
261 }
262
263 static int64_t cpu_get_ticks_locked(void)
264 {
265 int64_t ticks = timers_state.cpu_ticks_offset;
266 if (timers_state.cpu_ticks_enabled) {
267 ticks += cpu_get_host_ticks();
268 }
269
270 if (timers_state.cpu_ticks_prev > ticks) {
271 /* Non increasing ticks may happen if the host uses software suspend. */
272 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
273 ticks = timers_state.cpu_ticks_prev;
274 }
275
276 timers_state.cpu_ticks_prev = ticks;
277 return ticks;
278 }
279
280 /* return the time elapsed in VM between vm_start and vm_stop. Unless
281 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
282 * counter.
283 */
284 int64_t cpu_get_ticks(void)
285 {
286 int64_t ticks;
287
288 if (use_icount) {
289 return cpu_get_icount();
290 }
291
292 qemu_spin_lock(&timers_state.vm_clock_lock);
293 ticks = cpu_get_ticks_locked();
294 qemu_spin_unlock(&timers_state.vm_clock_lock);
295 return ticks;
296 }
297
298 static int64_t cpu_get_clock_locked(void)
299 {
300 int64_t time;
301
302 time = timers_state.cpu_clock_offset;
303 if (timers_state.cpu_ticks_enabled) {
304 time += get_clock();
305 }
306
307 return time;
308 }
309
310 /* Return the monotonic time elapsed in VM, i.e.,
311 * the time between vm_start and vm_stop
312 */
313 int64_t cpu_get_clock(void)
314 {
315 int64_t ti;
316 unsigned start;
317
318 do {
319 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
320 ti = cpu_get_clock_locked();
321 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
322
323 return ti;
324 }
325
326 /* enable cpu_get_ticks()
327 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
328 */
329 void cpu_enable_ticks(void)
330 {
331 seqlock_write_lock(&timers_state.vm_clock_seqlock,
332 &timers_state.vm_clock_lock);
333 if (!timers_state.cpu_ticks_enabled) {
334 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
335 timers_state.cpu_clock_offset -= get_clock();
336 timers_state.cpu_ticks_enabled = 1;
337 }
338 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
339 &timers_state.vm_clock_lock);
340 }
341
342 /* disable cpu_get_ticks() : the clock is stopped. You must not call
343 * cpu_get_ticks() after that.
344 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
345 */
346 void cpu_disable_ticks(void)
347 {
348 seqlock_write_lock(&timers_state.vm_clock_seqlock,
349 &timers_state.vm_clock_lock);
350 if (timers_state.cpu_ticks_enabled) {
351 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
352 timers_state.cpu_clock_offset = cpu_get_clock_locked();
353 timers_state.cpu_ticks_enabled = 0;
354 }
355 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
356 &timers_state.vm_clock_lock);
357 }
358
359 /* Correlation between real and virtual time is always going to be
360 fairly approximate, so ignore small variation.
361 When the guest is idle real and virtual time will be aligned in
362 the IO wait loop. */
363 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
364
365 static void icount_adjust(void)
366 {
367 int64_t cur_time;
368 int64_t cur_icount;
369 int64_t delta;
370
371 /* Protected by TimersState mutex. */
372 static int64_t last_delta;
373
374 /* If the VM is not running, then do nothing. */
375 if (!runstate_is_running()) {
376 return;
377 }
378
379 seqlock_write_lock(&timers_state.vm_clock_seqlock,
380 &timers_state.vm_clock_lock);
381 cur_time = cpu_get_clock_locked();
382 cur_icount = cpu_get_icount_locked();
383
384 delta = cur_icount - cur_time;
385 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
386 if (delta > 0
387 && last_delta + ICOUNT_WOBBLE < delta * 2
388 && timers_state.icount_time_shift > 0) {
389 /* The guest is getting too far ahead. Slow time down. */
390 atomic_set(&timers_state.icount_time_shift,
391 timers_state.icount_time_shift - 1);
392 }
393 if (delta < 0
394 && last_delta - ICOUNT_WOBBLE > delta * 2
395 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
396 /* The guest is getting too far behind. Speed time up. */
397 atomic_set(&timers_state.icount_time_shift,
398 timers_state.icount_time_shift + 1);
399 }
400 last_delta = delta;
401 atomic_set_i64(&timers_state.qemu_icount_bias,
402 cur_icount - (timers_state.qemu_icount
403 << timers_state.icount_time_shift));
404 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
405 &timers_state.vm_clock_lock);
406 }
407
408 static void icount_adjust_rt(void *opaque)
409 {
410 timer_mod(timers_state.icount_rt_timer,
411 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
412 icount_adjust();
413 }
414
415 static void icount_adjust_vm(void *opaque)
416 {
417 timer_mod(timers_state.icount_vm_timer,
418 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
419 NANOSECONDS_PER_SECOND / 10);
420 icount_adjust();
421 }
422
423 static int64_t qemu_icount_round(int64_t count)
424 {
425 int shift = atomic_read(&timers_state.icount_time_shift);
426 return (count + (1 << shift) - 1) >> shift;
427 }
428
429 static void icount_warp_rt(void)
430 {
431 unsigned seq;
432 int64_t warp_start;
433
434 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
435 * changes from -1 to another value, so the race here is okay.
436 */
437 do {
438 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
439 warp_start = timers_state.vm_clock_warp_start;
440 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
441
442 if (warp_start == -1) {
443 return;
444 }
445
446 seqlock_write_lock(&timers_state.vm_clock_seqlock,
447 &timers_state.vm_clock_lock);
448 if (runstate_is_running()) {
449 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
450 cpu_get_clock_locked());
451 int64_t warp_delta;
452
453 warp_delta = clock - timers_state.vm_clock_warp_start;
454 if (use_icount == 2) {
455 /*
456 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
457 * far ahead of real time.
458 */
459 int64_t cur_icount = cpu_get_icount_locked();
460 int64_t delta = clock - cur_icount;
461 warp_delta = MIN(warp_delta, delta);
462 }
463 atomic_set_i64(&timers_state.qemu_icount_bias,
464 timers_state.qemu_icount_bias + warp_delta);
465 }
466 timers_state.vm_clock_warp_start = -1;
467 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
468 &timers_state.vm_clock_lock);
469
470 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
471 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
472 }
473 }
474
475 static void icount_timer_cb(void *opaque)
476 {
477 /* No need for a checkpoint because the timer already synchronizes
478 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
479 */
480 icount_warp_rt();
481 }
482
483 void qtest_clock_warp(int64_t dest)
484 {
485 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
486 AioContext *aio_context;
487 assert(qtest_enabled());
488 aio_context = qemu_get_aio_context();
489 while (clock < dest) {
490 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
491 QEMU_TIMER_ATTR_ALL);
492 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
493
494 seqlock_write_lock(&timers_state.vm_clock_seqlock,
495 &timers_state.vm_clock_lock);
496 atomic_set_i64(&timers_state.qemu_icount_bias,
497 timers_state.qemu_icount_bias + warp);
498 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
499 &timers_state.vm_clock_lock);
500
501 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
502 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
503 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
504 }
505 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
506 }
507
508 void qemu_start_warp_timer(void)
509 {
510 int64_t clock;
511 int64_t deadline;
512
513 if (!use_icount) {
514 return;
515 }
516
517 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
518 * do not fire, so computing the deadline does not make sense.
519 */
520 if (!runstate_is_running()) {
521 return;
522 }
523
524 if (replay_mode != REPLAY_MODE_PLAY) {
525 if (!all_cpu_threads_idle()) {
526 return;
527 }
528
529 if (qtest_enabled()) {
530 /* When testing, qtest commands advance icount. */
531 return;
532 }
533
534 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
535 } else {
536 /* warp clock deterministically in record/replay mode */
537 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
538 /* vCPU is sleeping and warp can't be started.
539 It is probably a race condition: notification sent
540 to vCPU was processed in advance and vCPU went to sleep.
541 Therefore we have to wake it up for doing someting. */
542 if (replay_has_checkpoint()) {
543 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
544 }
545 return;
546 }
547 }
548
549 /* We want to use the earliest deadline from ALL vm_clocks */
550 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
551 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
552 ~QEMU_TIMER_ATTR_EXTERNAL);
553 if (deadline < 0) {
554 static bool notified;
555 if (!icount_sleep && !notified) {
556 warn_report("icount sleep disabled and no active timers");
557 notified = true;
558 }
559 return;
560 }
561
562 if (deadline > 0) {
563 /*
564 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
565 * sleep. Otherwise, the CPU might be waiting for a future timer
566 * interrupt to wake it up, but the interrupt never comes because
567 * the vCPU isn't running any insns and thus doesn't advance the
568 * QEMU_CLOCK_VIRTUAL.
569 */
570 if (!icount_sleep) {
571 /*
572 * We never let VCPUs sleep in no sleep icount mode.
573 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
574 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
575 * It is useful when we want a deterministic execution time,
576 * isolated from host latencies.
577 */
578 seqlock_write_lock(&timers_state.vm_clock_seqlock,
579 &timers_state.vm_clock_lock);
580 atomic_set_i64(&timers_state.qemu_icount_bias,
581 timers_state.qemu_icount_bias + deadline);
582 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
583 &timers_state.vm_clock_lock);
584 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
585 } else {
586 /*
587 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
588 * "real" time, (related to the time left until the next event) has
589 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
590 * This avoids that the warps are visible externally; for example,
591 * you will not be sending network packets continuously instead of
592 * every 100ms.
593 */
594 seqlock_write_lock(&timers_state.vm_clock_seqlock,
595 &timers_state.vm_clock_lock);
596 if (timers_state.vm_clock_warp_start == -1
597 || timers_state.vm_clock_warp_start > clock) {
598 timers_state.vm_clock_warp_start = clock;
599 }
600 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
601 &timers_state.vm_clock_lock);
602 timer_mod_anticipate(timers_state.icount_warp_timer,
603 clock + deadline);
604 }
605 } else if (deadline == 0) {
606 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
607 }
608 }
609
610 static void qemu_account_warp_timer(void)
611 {
612 if (!use_icount || !icount_sleep) {
613 return;
614 }
615
616 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
617 * do not fire, so computing the deadline does not make sense.
618 */
619 if (!runstate_is_running()) {
620 return;
621 }
622
623 /* warp clock deterministically in record/replay mode */
624 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
625 return;
626 }
627
628 timer_del(timers_state.icount_warp_timer);
629 icount_warp_rt();
630 }
631
632 static bool icount_state_needed(void *opaque)
633 {
634 return use_icount;
635 }
636
637 static bool warp_timer_state_needed(void *opaque)
638 {
639 TimersState *s = opaque;
640 return s->icount_warp_timer != NULL;
641 }
642
643 static bool adjust_timers_state_needed(void *opaque)
644 {
645 TimersState *s = opaque;
646 return s->icount_rt_timer != NULL;
647 }
648
649 /*
650 * Subsection for warp timer migration is optional, because may not be created
651 */
652 static const VMStateDescription icount_vmstate_warp_timer = {
653 .name = "timer/icount/warp_timer",
654 .version_id = 1,
655 .minimum_version_id = 1,
656 .needed = warp_timer_state_needed,
657 .fields = (VMStateField[]) {
658 VMSTATE_INT64(vm_clock_warp_start, TimersState),
659 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
660 VMSTATE_END_OF_LIST()
661 }
662 };
663
664 static const VMStateDescription icount_vmstate_adjust_timers = {
665 .name = "timer/icount/timers",
666 .version_id = 1,
667 .minimum_version_id = 1,
668 .needed = adjust_timers_state_needed,
669 .fields = (VMStateField[]) {
670 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
671 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
672 VMSTATE_END_OF_LIST()
673 }
674 };
675
676 /*
677 * This is a subsection for icount migration.
678 */
679 static const VMStateDescription icount_vmstate_timers = {
680 .name = "timer/icount",
681 .version_id = 1,
682 .minimum_version_id = 1,
683 .needed = icount_state_needed,
684 .fields = (VMStateField[]) {
685 VMSTATE_INT64(qemu_icount_bias, TimersState),
686 VMSTATE_INT64(qemu_icount, TimersState),
687 VMSTATE_END_OF_LIST()
688 },
689 .subsections = (const VMStateDescription*[]) {
690 &icount_vmstate_warp_timer,
691 &icount_vmstate_adjust_timers,
692 NULL
693 }
694 };
695
696 static const VMStateDescription vmstate_timers = {
697 .name = "timer",
698 .version_id = 2,
699 .minimum_version_id = 1,
700 .fields = (VMStateField[]) {
701 VMSTATE_INT64(cpu_ticks_offset, TimersState),
702 VMSTATE_UNUSED(8),
703 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
704 VMSTATE_END_OF_LIST()
705 },
706 .subsections = (const VMStateDescription*[]) {
707 &icount_vmstate_timers,
708 NULL
709 }
710 };
711
712 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
713 {
714 double pct;
715 double throttle_ratio;
716 int64_t sleeptime_ns, endtime_ns;
717
718 if (!cpu_throttle_get_percentage()) {
719 return;
720 }
721
722 pct = (double)cpu_throttle_get_percentage()/100;
723 throttle_ratio = pct / (1 - pct);
724 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
725 sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
726 endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
727 while (sleeptime_ns > 0 && !cpu->stop) {
728 if (sleeptime_ns > SCALE_MS) {
729 qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
730 sleeptime_ns / SCALE_MS);
731 } else {
732 qemu_mutex_unlock_iothread();
733 g_usleep(sleeptime_ns / SCALE_US);
734 qemu_mutex_lock_iothread();
735 }
736 sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
737 }
738 atomic_set(&cpu->throttle_thread_scheduled, 0);
739 }
740
741 static void cpu_throttle_timer_tick(void *opaque)
742 {
743 CPUState *cpu;
744 double pct;
745
746 /* Stop the timer if needed */
747 if (!cpu_throttle_get_percentage()) {
748 return;
749 }
750 CPU_FOREACH(cpu) {
751 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
752 async_run_on_cpu(cpu, cpu_throttle_thread,
753 RUN_ON_CPU_NULL);
754 }
755 }
756
757 pct = (double)cpu_throttle_get_percentage()/100;
758 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
759 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
760 }
761
762 void cpu_throttle_set(int new_throttle_pct)
763 {
764 /* Ensure throttle percentage is within valid range */
765 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
766 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
767
768 atomic_set(&throttle_percentage, new_throttle_pct);
769
770 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
771 CPU_THROTTLE_TIMESLICE_NS);
772 }
773
774 void cpu_throttle_stop(void)
775 {
776 atomic_set(&throttle_percentage, 0);
777 }
778
779 bool cpu_throttle_active(void)
780 {
781 return (cpu_throttle_get_percentage() != 0);
782 }
783
784 int cpu_throttle_get_percentage(void)
785 {
786 return atomic_read(&throttle_percentage);
787 }
788
789 void cpu_ticks_init(void)
790 {
791 seqlock_init(&timers_state.vm_clock_seqlock);
792 qemu_spin_init(&timers_state.vm_clock_lock);
793 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
794 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
795 cpu_throttle_timer_tick, NULL);
796 }
797
798 void configure_icount(QemuOpts *opts, Error **errp)
799 {
800 const char *option;
801 char *rem_str = NULL;
802
803 option = qemu_opt_get(opts, "shift");
804 if (!option) {
805 if (qemu_opt_get(opts, "align") != NULL) {
806 error_setg(errp, "Please specify shift option when using align");
807 }
808 return;
809 }
810
811 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
812 if (icount_sleep) {
813 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
814 icount_timer_cb, NULL);
815 }
816
817 icount_align_option = qemu_opt_get_bool(opts, "align", false);
818
819 if (icount_align_option && !icount_sleep) {
820 error_setg(errp, "align=on and sleep=off are incompatible");
821 }
822 if (strcmp(option, "auto") != 0) {
823 errno = 0;
824 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
825 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
826 error_setg(errp, "icount: Invalid shift value");
827 }
828 use_icount = 1;
829 return;
830 } else if (icount_align_option) {
831 error_setg(errp, "shift=auto and align=on are incompatible");
832 } else if (!icount_sleep) {
833 error_setg(errp, "shift=auto and sleep=off are incompatible");
834 }
835
836 use_icount = 2;
837
838 /* 125MIPS seems a reasonable initial guess at the guest speed.
839 It will be corrected fairly quickly anyway. */
840 timers_state.icount_time_shift = 3;
841
842 /* Have both realtime and virtual time triggers for speed adjustment.
843 The realtime trigger catches emulated time passing too slowly,
844 the virtual time trigger catches emulated time passing too fast.
845 Realtime triggers occur even when idle, so use them less frequently
846 than VM triggers. */
847 timers_state.vm_clock_warp_start = -1;
848 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
849 icount_adjust_rt, NULL);
850 timer_mod(timers_state.icount_rt_timer,
851 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
852 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
853 icount_adjust_vm, NULL);
854 timer_mod(timers_state.icount_vm_timer,
855 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
856 NANOSECONDS_PER_SECOND / 10);
857 }
858
859 /***********************************************************/
860 /* TCG vCPU kick timer
861 *
862 * The kick timer is responsible for moving single threaded vCPU
863 * emulation on to the next vCPU. If more than one vCPU is running a
864 * timer event with force a cpu->exit so the next vCPU can get
865 * scheduled.
866 *
867 * The timer is removed if all vCPUs are idle and restarted again once
868 * idleness is complete.
869 */
870
871 static QEMUTimer *tcg_kick_vcpu_timer;
872 static CPUState *tcg_current_rr_cpu;
873
874 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
875
876 static inline int64_t qemu_tcg_next_kick(void)
877 {
878 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
879 }
880
881 /* Kick the currently round-robin scheduled vCPU to next */
882 static void qemu_cpu_kick_rr_next_cpu(void)
883 {
884 CPUState *cpu;
885 do {
886 cpu = atomic_mb_read(&tcg_current_rr_cpu);
887 if (cpu) {
888 cpu_exit(cpu);
889 }
890 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
891 }
892
893 /* Kick all RR vCPUs */
894 static void qemu_cpu_kick_rr_cpus(void)
895 {
896 CPUState *cpu;
897
898 CPU_FOREACH(cpu) {
899 cpu_exit(cpu);
900 };
901 }
902
903 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
904 {
905 }
906
907 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
908 {
909 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
910 qemu_notify_event();
911 return;
912 }
913
914 if (qemu_in_vcpu_thread()) {
915 /* A CPU is currently running; kick it back out to the
916 * tcg_cpu_exec() loop so it will recalculate its
917 * icount deadline immediately.
918 */
919 qemu_cpu_kick(current_cpu);
920 } else if (first_cpu) {
921 /* qemu_cpu_kick is not enough to kick a halted CPU out of
922 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
923 * causes cpu_thread_is_idle to return false. This way,
924 * handle_icount_deadline can run.
925 * If we have no CPUs at all for some reason, we don't
926 * need to do anything.
927 */
928 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
929 }
930 }
931
932 static void kick_tcg_thread(void *opaque)
933 {
934 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
935 qemu_cpu_kick_rr_next_cpu();
936 }
937
938 static void start_tcg_kick_timer(void)
939 {
940 assert(!mttcg_enabled);
941 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
942 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
943 kick_tcg_thread, NULL);
944 }
945 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
946 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
947 }
948 }
949
950 static void stop_tcg_kick_timer(void)
951 {
952 assert(!mttcg_enabled);
953 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
954 timer_del(tcg_kick_vcpu_timer);
955 }
956 }
957
958 /***********************************************************/
959 void hw_error(const char *fmt, ...)
960 {
961 va_list ap;
962 CPUState *cpu;
963
964 va_start(ap, fmt);
965 fprintf(stderr, "qemu: hardware error: ");
966 vfprintf(stderr, fmt, ap);
967 fprintf(stderr, "\n");
968 CPU_FOREACH(cpu) {
969 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
970 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
971 }
972 va_end(ap);
973 abort();
974 }
975
976 void cpu_synchronize_all_states(void)
977 {
978 CPUState *cpu;
979
980 CPU_FOREACH(cpu) {
981 cpu_synchronize_state(cpu);
982 /* TODO: move to cpu_synchronize_state() */
983 if (hvf_enabled()) {
984 hvf_cpu_synchronize_state(cpu);
985 }
986 }
987 }
988
989 void cpu_synchronize_all_post_reset(void)
990 {
991 CPUState *cpu;
992
993 CPU_FOREACH(cpu) {
994 cpu_synchronize_post_reset(cpu);
995 /* TODO: move to cpu_synchronize_post_reset() */
996 if (hvf_enabled()) {
997 hvf_cpu_synchronize_post_reset(cpu);
998 }
999 }
1000 }
1001
1002 void cpu_synchronize_all_post_init(void)
1003 {
1004 CPUState *cpu;
1005
1006 CPU_FOREACH(cpu) {
1007 cpu_synchronize_post_init(cpu);
1008 /* TODO: move to cpu_synchronize_post_init() */
1009 if (hvf_enabled()) {
1010 hvf_cpu_synchronize_post_init(cpu);
1011 }
1012 }
1013 }
1014
1015 void cpu_synchronize_all_pre_loadvm(void)
1016 {
1017 CPUState *cpu;
1018
1019 CPU_FOREACH(cpu) {
1020 cpu_synchronize_pre_loadvm(cpu);
1021 }
1022 }
1023
1024 static int do_vm_stop(RunState state, bool send_stop)
1025 {
1026 int ret = 0;
1027
1028 if (runstate_is_running()) {
1029 runstate_set(state);
1030 cpu_disable_ticks();
1031 pause_all_vcpus();
1032 vm_state_notify(0, state);
1033 if (send_stop) {
1034 qapi_event_send_stop();
1035 }
1036 }
1037
1038 bdrv_drain_all();
1039 ret = bdrv_flush_all();
1040
1041 return ret;
1042 }
1043
1044 /* Special vm_stop() variant for terminating the process. Historically clients
1045 * did not expect a QMP STOP event and so we need to retain compatibility.
1046 */
1047 int vm_shutdown(void)
1048 {
1049 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1050 }
1051
1052 static bool cpu_can_run(CPUState *cpu)
1053 {
1054 if (cpu->stop) {
1055 return false;
1056 }
1057 if (cpu_is_stopped(cpu)) {
1058 return false;
1059 }
1060 return true;
1061 }
1062
1063 static void cpu_handle_guest_debug(CPUState *cpu)
1064 {
1065 gdb_set_stop_cpu(cpu);
1066 qemu_system_debug_request();
1067 cpu->stopped = true;
1068 }
1069
1070 #ifdef CONFIG_LINUX
1071 static void sigbus_reraise(void)
1072 {
1073 sigset_t set;
1074 struct sigaction action;
1075
1076 memset(&action, 0, sizeof(action));
1077 action.sa_handler = SIG_DFL;
1078 if (!sigaction(SIGBUS, &action, NULL)) {
1079 raise(SIGBUS);
1080 sigemptyset(&set);
1081 sigaddset(&set, SIGBUS);
1082 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1083 }
1084 perror("Failed to re-raise SIGBUS!\n");
1085 abort();
1086 }
1087
1088 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1089 {
1090 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1091 sigbus_reraise();
1092 }
1093
1094 if (current_cpu) {
1095 /* Called asynchronously in VCPU thread. */
1096 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1097 sigbus_reraise();
1098 }
1099 } else {
1100 /* Called synchronously (via signalfd) in main thread. */
1101 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1102 sigbus_reraise();
1103 }
1104 }
1105 }
1106
1107 static void qemu_init_sigbus(void)
1108 {
1109 struct sigaction action;
1110
1111 memset(&action, 0, sizeof(action));
1112 action.sa_flags = SA_SIGINFO;
1113 action.sa_sigaction = sigbus_handler;
1114 sigaction(SIGBUS, &action, NULL);
1115
1116 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1117 }
1118 #else /* !CONFIG_LINUX */
1119 static void qemu_init_sigbus(void)
1120 {
1121 }
1122 #endif /* !CONFIG_LINUX */
1123
1124 static QemuThread io_thread;
1125
1126 /* cpu creation */
1127 static QemuCond qemu_cpu_cond;
1128 /* system init */
1129 static QemuCond qemu_pause_cond;
1130
1131 void qemu_init_cpu_loop(void)
1132 {
1133 qemu_init_sigbus();
1134 qemu_cond_init(&qemu_cpu_cond);
1135 qemu_cond_init(&qemu_pause_cond);
1136 qemu_mutex_init(&qemu_global_mutex);
1137
1138 qemu_thread_get_self(&io_thread);
1139 }
1140
1141 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1142 {
1143 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1144 }
1145
1146 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1147 {
1148 if (kvm_destroy_vcpu(cpu) < 0) {
1149 error_report("kvm_destroy_vcpu failed");
1150 exit(EXIT_FAILURE);
1151 }
1152 }
1153
1154 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1155 {
1156 }
1157
1158 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1159 {
1160 g_assert(qemu_cpu_is_self(cpu));
1161 cpu->stop = false;
1162 cpu->stopped = true;
1163 if (exit) {
1164 cpu_exit(cpu);
1165 }
1166 qemu_cond_broadcast(&qemu_pause_cond);
1167 }
1168
1169 static void qemu_wait_io_event_common(CPUState *cpu)
1170 {
1171 atomic_mb_set(&cpu->thread_kicked, false);
1172 if (cpu->stop) {
1173 qemu_cpu_stop(cpu, false);
1174 }
1175 process_queued_cpu_work(cpu);
1176 }
1177
1178 static void qemu_tcg_rr_wait_io_event(void)
1179 {
1180 CPUState *cpu;
1181
1182 while (all_cpu_threads_idle()) {
1183 stop_tcg_kick_timer();
1184 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1185 }
1186
1187 start_tcg_kick_timer();
1188
1189 CPU_FOREACH(cpu) {
1190 qemu_wait_io_event_common(cpu);
1191 }
1192 }
1193
1194 static void qemu_wait_io_event(CPUState *cpu)
1195 {
1196 bool slept = false;
1197
1198 while (cpu_thread_is_idle(cpu)) {
1199 if (!slept) {
1200 slept = true;
1201 qemu_plugin_vcpu_idle_cb(cpu);
1202 }
1203 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1204 }
1205 if (slept) {
1206 qemu_plugin_vcpu_resume_cb(cpu);
1207 }
1208
1209 #ifdef _WIN32
1210 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1211 if (!tcg_enabled()) {
1212 SleepEx(0, TRUE);
1213 }
1214 #endif
1215 qemu_wait_io_event_common(cpu);
1216 }
1217
1218 static void *qemu_kvm_cpu_thread_fn(void *arg)
1219 {
1220 CPUState *cpu = arg;
1221 int r;
1222
1223 rcu_register_thread();
1224
1225 qemu_mutex_lock_iothread();
1226 qemu_thread_get_self(cpu->thread);
1227 cpu->thread_id = qemu_get_thread_id();
1228 cpu->can_do_io = 1;
1229 current_cpu = cpu;
1230
1231 r = kvm_init_vcpu(cpu);
1232 if (r < 0) {
1233 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1234 exit(1);
1235 }
1236
1237 kvm_init_cpu_signals(cpu);
1238
1239 /* signal CPU creation */
1240 cpu->created = true;
1241 qemu_cond_signal(&qemu_cpu_cond);
1242 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1243
1244 do {
1245 if (cpu_can_run(cpu)) {
1246 r = kvm_cpu_exec(cpu);
1247 if (r == EXCP_DEBUG) {
1248 cpu_handle_guest_debug(cpu);
1249 }
1250 }
1251 qemu_wait_io_event(cpu);
1252 } while (!cpu->unplug || cpu_can_run(cpu));
1253
1254 qemu_kvm_destroy_vcpu(cpu);
1255 cpu->created = false;
1256 qemu_cond_signal(&qemu_cpu_cond);
1257 qemu_mutex_unlock_iothread();
1258 rcu_unregister_thread();
1259 return NULL;
1260 }
1261
1262 static void *qemu_dummy_cpu_thread_fn(void *arg)
1263 {
1264 #ifdef _WIN32
1265 error_report("qtest is not supported under Windows");
1266 exit(1);
1267 #else
1268 CPUState *cpu = arg;
1269 sigset_t waitset;
1270 int r;
1271
1272 rcu_register_thread();
1273
1274 qemu_mutex_lock_iothread();
1275 qemu_thread_get_self(cpu->thread);
1276 cpu->thread_id = qemu_get_thread_id();
1277 cpu->can_do_io = 1;
1278 current_cpu = cpu;
1279
1280 sigemptyset(&waitset);
1281 sigaddset(&waitset, SIG_IPI);
1282
1283 /* signal CPU creation */
1284 cpu->created = true;
1285 qemu_cond_signal(&qemu_cpu_cond);
1286 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1287
1288 do {
1289 qemu_mutex_unlock_iothread();
1290 do {
1291 int sig;
1292 r = sigwait(&waitset, &sig);
1293 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1294 if (r == -1) {
1295 perror("sigwait");
1296 exit(1);
1297 }
1298 qemu_mutex_lock_iothread();
1299 qemu_wait_io_event(cpu);
1300 } while (!cpu->unplug);
1301
1302 qemu_mutex_unlock_iothread();
1303 rcu_unregister_thread();
1304 return NULL;
1305 #endif
1306 }
1307
1308 static int64_t tcg_get_icount_limit(void)
1309 {
1310 int64_t deadline;
1311
1312 if (replay_mode != REPLAY_MODE_PLAY) {
1313 /*
1314 * Include all the timers, because they may need an attention.
1315 * Too long CPU execution may create unnecessary delay in UI.
1316 */
1317 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1318 QEMU_TIMER_ATTR_ALL);
1319 /* Check realtime timers, because they help with input processing */
1320 deadline = qemu_soonest_timeout(deadline,
1321 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1322 QEMU_TIMER_ATTR_ALL));
1323
1324 /* Maintain prior (possibly buggy) behaviour where if no deadline
1325 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1326 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1327 * nanoseconds.
1328 */
1329 if ((deadline < 0) || (deadline > INT32_MAX)) {
1330 deadline = INT32_MAX;
1331 }
1332
1333 return qemu_icount_round(deadline);
1334 } else {
1335 return replay_get_instructions();
1336 }
1337 }
1338
1339 static void handle_icount_deadline(void)
1340 {
1341 assert(qemu_in_vcpu_thread());
1342 if (use_icount) {
1343 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1344 QEMU_TIMER_ATTR_ALL);
1345
1346 if (deadline == 0) {
1347 /* Wake up other AioContexts. */
1348 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1349 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1350 }
1351 }
1352 }
1353
1354 static void prepare_icount_for_run(CPUState *cpu)
1355 {
1356 if (use_icount) {
1357 int insns_left;
1358
1359 /* These should always be cleared by process_icount_data after
1360 * each vCPU execution. However u16.high can be raised
1361 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1362 */
1363 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
1364 g_assert(cpu->icount_extra == 0);
1365
1366 cpu->icount_budget = tcg_get_icount_limit();
1367 insns_left = MIN(0xffff, cpu->icount_budget);
1368 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1369 cpu->icount_extra = cpu->icount_budget - insns_left;
1370
1371 replay_mutex_lock();
1372 }
1373 }
1374
1375 static void process_icount_data(CPUState *cpu)
1376 {
1377 if (use_icount) {
1378 /* Account for executed instructions */
1379 cpu_update_icount(cpu);
1380
1381 /* Reset the counters */
1382 cpu_neg(cpu)->icount_decr.u16.low = 0;
1383 cpu->icount_extra = 0;
1384 cpu->icount_budget = 0;
1385
1386 replay_account_executed_instructions();
1387
1388 replay_mutex_unlock();
1389 }
1390 }
1391
1392
1393 static int tcg_cpu_exec(CPUState *cpu)
1394 {
1395 int ret;
1396 #ifdef CONFIG_PROFILER
1397 int64_t ti;
1398 #endif
1399
1400 assert(tcg_enabled());
1401 #ifdef CONFIG_PROFILER
1402 ti = profile_getclock();
1403 #endif
1404 cpu_exec_start(cpu);
1405 ret = cpu_exec(cpu);
1406 cpu_exec_end(cpu);
1407 #ifdef CONFIG_PROFILER
1408 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1409 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1410 #endif
1411 return ret;
1412 }
1413
1414 /* Destroy any remaining vCPUs which have been unplugged and have
1415 * finished running
1416 */
1417 static void deal_with_unplugged_cpus(void)
1418 {
1419 CPUState *cpu;
1420
1421 CPU_FOREACH(cpu) {
1422 if (cpu->unplug && !cpu_can_run(cpu)) {
1423 qemu_tcg_destroy_vcpu(cpu);
1424 cpu->created = false;
1425 qemu_cond_signal(&qemu_cpu_cond);
1426 break;
1427 }
1428 }
1429 }
1430
1431 /* Single-threaded TCG
1432 *
1433 * In the single-threaded case each vCPU is simulated in turn. If
1434 * there is more than a single vCPU we create a simple timer to kick
1435 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1436 * This is done explicitly rather than relying on side-effects
1437 * elsewhere.
1438 */
1439
1440 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1441 {
1442 CPUState *cpu = arg;
1443
1444 assert(tcg_enabled());
1445 rcu_register_thread();
1446 tcg_register_thread();
1447
1448 qemu_mutex_lock_iothread();
1449 qemu_thread_get_self(cpu->thread);
1450
1451 cpu->thread_id = qemu_get_thread_id();
1452 cpu->created = true;
1453 cpu->can_do_io = 1;
1454 qemu_cond_signal(&qemu_cpu_cond);
1455 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1456
1457 /* wait for initial kick-off after machine start */
1458 while (first_cpu->stopped) {
1459 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1460
1461 /* process any pending work */
1462 CPU_FOREACH(cpu) {
1463 current_cpu = cpu;
1464 qemu_wait_io_event_common(cpu);
1465 }
1466 }
1467
1468 start_tcg_kick_timer();
1469
1470 cpu = first_cpu;
1471
1472 /* process any pending work */
1473 cpu->exit_request = 1;
1474
1475 while (1) {
1476 qemu_mutex_unlock_iothread();
1477 replay_mutex_lock();
1478 qemu_mutex_lock_iothread();
1479 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1480 qemu_account_warp_timer();
1481
1482 /* Run the timers here. This is much more efficient than
1483 * waking up the I/O thread and waiting for completion.
1484 */
1485 handle_icount_deadline();
1486
1487 replay_mutex_unlock();
1488
1489 if (!cpu) {
1490 cpu = first_cpu;
1491 }
1492
1493 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1494
1495 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1496 current_cpu = cpu;
1497
1498 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1499 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1500
1501 if (cpu_can_run(cpu)) {
1502 int r;
1503
1504 qemu_mutex_unlock_iothread();
1505 prepare_icount_for_run(cpu);
1506
1507 r = tcg_cpu_exec(cpu);
1508
1509 process_icount_data(cpu);
1510 qemu_mutex_lock_iothread();
1511
1512 if (r == EXCP_DEBUG) {
1513 cpu_handle_guest_debug(cpu);
1514 break;
1515 } else if (r == EXCP_ATOMIC) {
1516 qemu_mutex_unlock_iothread();
1517 cpu_exec_step_atomic(cpu);
1518 qemu_mutex_lock_iothread();
1519 break;
1520 }
1521 } else if (cpu->stop) {
1522 if (cpu->unplug) {
1523 cpu = CPU_NEXT(cpu);
1524 }
1525 break;
1526 }
1527
1528 cpu = CPU_NEXT(cpu);
1529 } /* while (cpu && !cpu->exit_request).. */
1530
1531 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1532 atomic_set(&tcg_current_rr_cpu, NULL);
1533
1534 if (cpu && cpu->exit_request) {
1535 atomic_mb_set(&cpu->exit_request, 0);
1536 }
1537
1538 if (use_icount && all_cpu_threads_idle()) {
1539 /*
1540 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1541 * in the main_loop, wake it up in order to start the warp timer.
1542 */
1543 qemu_notify_event();
1544 }
1545
1546 qemu_tcg_rr_wait_io_event();
1547 deal_with_unplugged_cpus();
1548 }
1549
1550 rcu_unregister_thread();
1551 return NULL;
1552 }
1553
1554 static void *qemu_hax_cpu_thread_fn(void *arg)
1555 {
1556 CPUState *cpu = arg;
1557 int r;
1558
1559 rcu_register_thread();
1560 qemu_mutex_lock_iothread();
1561 qemu_thread_get_self(cpu->thread);
1562
1563 cpu->thread_id = qemu_get_thread_id();
1564 cpu->created = true;
1565 current_cpu = cpu;
1566
1567 hax_init_vcpu(cpu);
1568 qemu_cond_signal(&qemu_cpu_cond);
1569 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1570
1571 do {
1572 if (cpu_can_run(cpu)) {
1573 r = hax_smp_cpu_exec(cpu);
1574 if (r == EXCP_DEBUG) {
1575 cpu_handle_guest_debug(cpu);
1576 }
1577 }
1578
1579 qemu_wait_io_event(cpu);
1580 } while (!cpu->unplug || cpu_can_run(cpu));
1581 rcu_unregister_thread();
1582 return NULL;
1583 }
1584
1585 /* The HVF-specific vCPU thread function. This one should only run when the host
1586 * CPU supports the VMX "unrestricted guest" feature. */
1587 static void *qemu_hvf_cpu_thread_fn(void *arg)
1588 {
1589 CPUState *cpu = arg;
1590
1591 int r;
1592
1593 assert(hvf_enabled());
1594
1595 rcu_register_thread();
1596
1597 qemu_mutex_lock_iothread();
1598 qemu_thread_get_self(cpu->thread);
1599
1600 cpu->thread_id = qemu_get_thread_id();
1601 cpu->can_do_io = 1;
1602 current_cpu = cpu;
1603
1604 hvf_init_vcpu(cpu);
1605
1606 /* signal CPU creation */
1607 cpu->created = true;
1608 qemu_cond_signal(&qemu_cpu_cond);
1609 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1610
1611 do {
1612 if (cpu_can_run(cpu)) {
1613 r = hvf_vcpu_exec(cpu);
1614 if (r == EXCP_DEBUG) {
1615 cpu_handle_guest_debug(cpu);
1616 }
1617 }
1618 qemu_wait_io_event(cpu);
1619 } while (!cpu->unplug || cpu_can_run(cpu));
1620
1621 hvf_vcpu_destroy(cpu);
1622 cpu->created = false;
1623 qemu_cond_signal(&qemu_cpu_cond);
1624 qemu_mutex_unlock_iothread();
1625 rcu_unregister_thread();
1626 return NULL;
1627 }
1628
1629 static void *qemu_whpx_cpu_thread_fn(void *arg)
1630 {
1631 CPUState *cpu = arg;
1632 int r;
1633
1634 rcu_register_thread();
1635
1636 qemu_mutex_lock_iothread();
1637 qemu_thread_get_self(cpu->thread);
1638 cpu->thread_id = qemu_get_thread_id();
1639 current_cpu = cpu;
1640
1641 r = whpx_init_vcpu(cpu);
1642 if (r < 0) {
1643 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1644 exit(1);
1645 }
1646
1647 /* signal CPU creation */
1648 cpu->created = true;
1649 qemu_cond_signal(&qemu_cpu_cond);
1650 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1651
1652 do {
1653 if (cpu_can_run(cpu)) {
1654 r = whpx_vcpu_exec(cpu);
1655 if (r == EXCP_DEBUG) {
1656 cpu_handle_guest_debug(cpu);
1657 }
1658 }
1659 while (cpu_thread_is_idle(cpu)) {
1660 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1661 }
1662 qemu_wait_io_event_common(cpu);
1663 } while (!cpu->unplug || cpu_can_run(cpu));
1664
1665 whpx_destroy_vcpu(cpu);
1666 cpu->created = false;
1667 qemu_cond_signal(&qemu_cpu_cond);
1668 qemu_mutex_unlock_iothread();
1669 rcu_unregister_thread();
1670 return NULL;
1671 }
1672
1673 #ifdef _WIN32
1674 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1675 {
1676 }
1677 #endif
1678
1679 /* Multi-threaded TCG
1680 *
1681 * In the multi-threaded case each vCPU has its own thread. The TLS
1682 * variable current_cpu can be used deep in the code to find the
1683 * current CPUState for a given thread.
1684 */
1685
1686 static void *qemu_tcg_cpu_thread_fn(void *arg)
1687 {
1688 CPUState *cpu = arg;
1689
1690 assert(tcg_enabled());
1691 g_assert(!use_icount);
1692
1693 rcu_register_thread();
1694 tcg_register_thread();
1695
1696 qemu_mutex_lock_iothread();
1697 qemu_thread_get_self(cpu->thread);
1698
1699 cpu->thread_id = qemu_get_thread_id();
1700 cpu->created = true;
1701 cpu->can_do_io = 1;
1702 current_cpu = cpu;
1703 qemu_cond_signal(&qemu_cpu_cond);
1704 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1705
1706 /* process any pending work */
1707 cpu->exit_request = 1;
1708
1709 do {
1710 if (cpu_can_run(cpu)) {
1711 int r;
1712 qemu_mutex_unlock_iothread();
1713 r = tcg_cpu_exec(cpu);
1714 qemu_mutex_lock_iothread();
1715 switch (r) {
1716 case EXCP_DEBUG:
1717 cpu_handle_guest_debug(cpu);
1718 break;
1719 case EXCP_HALTED:
1720 /* during start-up the vCPU is reset and the thread is
1721 * kicked several times. If we don't ensure we go back
1722 * to sleep in the halted state we won't cleanly
1723 * start-up when the vCPU is enabled.
1724 *
1725 * cpu->halted should ensure we sleep in wait_io_event
1726 */
1727 g_assert(cpu->halted);
1728 break;
1729 case EXCP_ATOMIC:
1730 qemu_mutex_unlock_iothread();
1731 cpu_exec_step_atomic(cpu);
1732 qemu_mutex_lock_iothread();
1733 default:
1734 /* Ignore everything else? */
1735 break;
1736 }
1737 }
1738
1739 atomic_mb_set(&cpu->exit_request, 0);
1740 qemu_wait_io_event(cpu);
1741 } while (!cpu->unplug || cpu_can_run(cpu));
1742
1743 qemu_tcg_destroy_vcpu(cpu);
1744 cpu->created = false;
1745 qemu_cond_signal(&qemu_cpu_cond);
1746 qemu_mutex_unlock_iothread();
1747 rcu_unregister_thread();
1748 return NULL;
1749 }
1750
1751 static void qemu_cpu_kick_thread(CPUState *cpu)
1752 {
1753 #ifndef _WIN32
1754 int err;
1755
1756 if (cpu->thread_kicked) {
1757 return;
1758 }
1759 cpu->thread_kicked = true;
1760 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1761 if (err && err != ESRCH) {
1762 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1763 exit(1);
1764 }
1765 #else /* _WIN32 */
1766 if (!qemu_cpu_is_self(cpu)) {
1767 if (whpx_enabled()) {
1768 whpx_vcpu_kick(cpu);
1769 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1770 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1771 __func__, GetLastError());
1772 exit(1);
1773 }
1774 }
1775 #endif
1776 }
1777
1778 void qemu_cpu_kick(CPUState *cpu)
1779 {
1780 qemu_cond_broadcast(cpu->halt_cond);
1781 if (tcg_enabled()) {
1782 if (qemu_tcg_mttcg_enabled()) {
1783 cpu_exit(cpu);
1784 } else {
1785 qemu_cpu_kick_rr_cpus();
1786 }
1787 } else {
1788 if (hax_enabled()) {
1789 /*
1790 * FIXME: race condition with the exit_request check in
1791 * hax_vcpu_hax_exec
1792 */
1793 cpu->exit_request = 1;
1794 }
1795 qemu_cpu_kick_thread(cpu);
1796 }
1797 }
1798
1799 void qemu_cpu_kick_self(void)
1800 {
1801 assert(current_cpu);
1802 qemu_cpu_kick_thread(current_cpu);
1803 }
1804
1805 bool qemu_cpu_is_self(CPUState *cpu)
1806 {
1807 return qemu_thread_is_self(cpu->thread);
1808 }
1809
1810 bool qemu_in_vcpu_thread(void)
1811 {
1812 return current_cpu && qemu_cpu_is_self(current_cpu);
1813 }
1814
1815 static __thread bool iothread_locked = false;
1816
1817 bool qemu_mutex_iothread_locked(void)
1818 {
1819 return iothread_locked;
1820 }
1821
1822 /*
1823 * The BQL is taken from so many places that it is worth profiling the
1824 * callers directly, instead of funneling them all through a single function.
1825 */
1826 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1827 {
1828 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1829
1830 g_assert(!qemu_mutex_iothread_locked());
1831 bql_lock(&qemu_global_mutex, file, line);
1832 iothread_locked = true;
1833 }
1834
1835 void qemu_mutex_unlock_iothread(void)
1836 {
1837 g_assert(qemu_mutex_iothread_locked());
1838 iothread_locked = false;
1839 qemu_mutex_unlock(&qemu_global_mutex);
1840 }
1841
1842 void qemu_cond_wait_iothread(QemuCond *cond)
1843 {
1844 qemu_cond_wait(cond, &qemu_global_mutex);
1845 }
1846
1847 static bool all_vcpus_paused(void)
1848 {
1849 CPUState *cpu;
1850
1851 CPU_FOREACH(cpu) {
1852 if (!cpu->stopped) {
1853 return false;
1854 }
1855 }
1856
1857 return true;
1858 }
1859
1860 void pause_all_vcpus(void)
1861 {
1862 CPUState *cpu;
1863
1864 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1865 CPU_FOREACH(cpu) {
1866 if (qemu_cpu_is_self(cpu)) {
1867 qemu_cpu_stop(cpu, true);
1868 } else {
1869 cpu->stop = true;
1870 qemu_cpu_kick(cpu);
1871 }
1872 }
1873
1874 /* We need to drop the replay_lock so any vCPU threads woken up
1875 * can finish their replay tasks
1876 */
1877 replay_mutex_unlock();
1878
1879 while (!all_vcpus_paused()) {
1880 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1881 CPU_FOREACH(cpu) {
1882 qemu_cpu_kick(cpu);
1883 }
1884 }
1885
1886 qemu_mutex_unlock_iothread();
1887 replay_mutex_lock();
1888 qemu_mutex_lock_iothread();
1889 }
1890
1891 void cpu_resume(CPUState *cpu)
1892 {
1893 cpu->stop = false;
1894 cpu->stopped = false;
1895 qemu_cpu_kick(cpu);
1896 }
1897
1898 void resume_all_vcpus(void)
1899 {
1900 CPUState *cpu;
1901
1902 if (!runstate_is_running()) {
1903 return;
1904 }
1905
1906 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1907 CPU_FOREACH(cpu) {
1908 cpu_resume(cpu);
1909 }
1910 }
1911
1912 void cpu_remove_sync(CPUState *cpu)
1913 {
1914 cpu->stop = true;
1915 cpu->unplug = true;
1916 qemu_cpu_kick(cpu);
1917 qemu_mutex_unlock_iothread();
1918 qemu_thread_join(cpu->thread);
1919 qemu_mutex_lock_iothread();
1920 }
1921
1922 /* For temporary buffers for forming a name */
1923 #define VCPU_THREAD_NAME_SIZE 16
1924
1925 static void qemu_tcg_init_vcpu(CPUState *cpu)
1926 {
1927 char thread_name[VCPU_THREAD_NAME_SIZE];
1928 static QemuCond *single_tcg_halt_cond;
1929 static QemuThread *single_tcg_cpu_thread;
1930 static int tcg_region_inited;
1931
1932 assert(tcg_enabled());
1933 /*
1934 * Initialize TCG regions--once. Now is a good time, because:
1935 * (1) TCG's init context, prologue and target globals have been set up.
1936 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1937 * -accel flag is processed, so the check doesn't work then).
1938 */
1939 if (!tcg_region_inited) {
1940 tcg_region_inited = 1;
1941 tcg_region_init();
1942 }
1943
1944 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1945 cpu->thread = g_malloc0(sizeof(QemuThread));
1946 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1947 qemu_cond_init(cpu->halt_cond);
1948
1949 if (qemu_tcg_mttcg_enabled()) {
1950 /* create a thread per vCPU with TCG (MTTCG) */
1951 parallel_cpus = true;
1952 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1953 cpu->cpu_index);
1954
1955 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1956 cpu, QEMU_THREAD_JOINABLE);
1957
1958 } else {
1959 /* share a single thread for all cpus with TCG */
1960 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1961 qemu_thread_create(cpu->thread, thread_name,
1962 qemu_tcg_rr_cpu_thread_fn,
1963 cpu, QEMU_THREAD_JOINABLE);
1964
1965 single_tcg_halt_cond = cpu->halt_cond;
1966 single_tcg_cpu_thread = cpu->thread;
1967 }
1968 #ifdef _WIN32
1969 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1970 #endif
1971 } else {
1972 /* For non-MTTCG cases we share the thread */
1973 cpu->thread = single_tcg_cpu_thread;
1974 cpu->halt_cond = single_tcg_halt_cond;
1975 cpu->thread_id = first_cpu->thread_id;
1976 cpu->can_do_io = 1;
1977 cpu->created = true;
1978 }
1979 }
1980
1981 static void qemu_hax_start_vcpu(CPUState *cpu)
1982 {
1983 char thread_name[VCPU_THREAD_NAME_SIZE];
1984
1985 cpu->thread = g_malloc0(sizeof(QemuThread));
1986 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1987 qemu_cond_init(cpu->halt_cond);
1988
1989 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1990 cpu->cpu_index);
1991 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1992 cpu, QEMU_THREAD_JOINABLE);
1993 #ifdef _WIN32
1994 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1995 #endif
1996 }
1997
1998 static void qemu_kvm_start_vcpu(CPUState *cpu)
1999 {
2000 char thread_name[VCPU_THREAD_NAME_SIZE];
2001
2002 cpu->thread = g_malloc0(sizeof(QemuThread));
2003 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2004 qemu_cond_init(cpu->halt_cond);
2005 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2006 cpu->cpu_index);
2007 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2008 cpu, QEMU_THREAD_JOINABLE);
2009 }
2010
2011 static void qemu_hvf_start_vcpu(CPUState *cpu)
2012 {
2013 char thread_name[VCPU_THREAD_NAME_SIZE];
2014
2015 /* HVF currently does not support TCG, and only runs in
2016 * unrestricted-guest mode. */
2017 assert(hvf_enabled());
2018
2019 cpu->thread = g_malloc0(sizeof(QemuThread));
2020 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2021 qemu_cond_init(cpu->halt_cond);
2022
2023 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2024 cpu->cpu_index);
2025 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2026 cpu, QEMU_THREAD_JOINABLE);
2027 }
2028
2029 static void qemu_whpx_start_vcpu(CPUState *cpu)
2030 {
2031 char thread_name[VCPU_THREAD_NAME_SIZE];
2032
2033 cpu->thread = g_malloc0(sizeof(QemuThread));
2034 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2035 qemu_cond_init(cpu->halt_cond);
2036 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2037 cpu->cpu_index);
2038 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2039 cpu, QEMU_THREAD_JOINABLE);
2040 #ifdef _WIN32
2041 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2042 #endif
2043 }
2044
2045 static void qemu_dummy_start_vcpu(CPUState *cpu)
2046 {
2047 char thread_name[VCPU_THREAD_NAME_SIZE];
2048
2049 cpu->thread = g_malloc0(sizeof(QemuThread));
2050 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2051 qemu_cond_init(cpu->halt_cond);
2052 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2053 cpu->cpu_index);
2054 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2055 QEMU_THREAD_JOINABLE);
2056 }
2057
2058 void qemu_init_vcpu(CPUState *cpu)
2059 {
2060 MachineState *ms = MACHINE(qdev_get_machine());
2061
2062 cpu->nr_cores = ms->smp.cores;
2063 cpu->nr_threads = ms->smp.threads;
2064 cpu->stopped = true;
2065 cpu->random_seed = qemu_guest_random_seed_thread_part1();
2066
2067 if (!cpu->as) {
2068 /* If the target cpu hasn't set up any address spaces itself,
2069 * give it the default one.
2070 */
2071 cpu->num_ases = 1;
2072 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2073 }
2074
2075 if (kvm_enabled()) {
2076 qemu_kvm_start_vcpu(cpu);
2077 } else if (hax_enabled()) {
2078 qemu_hax_start_vcpu(cpu);
2079 } else if (hvf_enabled()) {
2080 qemu_hvf_start_vcpu(cpu);
2081 } else if (tcg_enabled()) {
2082 qemu_tcg_init_vcpu(cpu);
2083 } else if (whpx_enabled()) {
2084 qemu_whpx_start_vcpu(cpu);
2085 } else {
2086 qemu_dummy_start_vcpu(cpu);
2087 }
2088
2089 while (!cpu->created) {
2090 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2091 }
2092 }
2093
2094 void cpu_stop_current(void)
2095 {
2096 if (current_cpu) {
2097 current_cpu->stop = true;
2098 cpu_exit(current_cpu);
2099 }
2100 }
2101
2102 int vm_stop(RunState state)
2103 {
2104 if (qemu_in_vcpu_thread()) {
2105 qemu_system_vmstop_request_prepare();
2106 qemu_system_vmstop_request(state);
2107 /*
2108 * FIXME: should not return to device code in case
2109 * vm_stop() has been requested.
2110 */
2111 cpu_stop_current();
2112 return 0;
2113 }
2114
2115 return do_vm_stop(state, true);
2116 }
2117
2118 /**
2119 * Prepare for (re)starting the VM.
2120 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2121 * running or in case of an error condition), 0 otherwise.
2122 */
2123 int vm_prepare_start(void)
2124 {
2125 RunState requested;
2126
2127 qemu_vmstop_requested(&requested);
2128 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2129 return -1;
2130 }
2131
2132 /* Ensure that a STOP/RESUME pair of events is emitted if a
2133 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2134 * example, according to documentation is always followed by
2135 * the STOP event.
2136 */
2137 if (runstate_is_running()) {
2138 qapi_event_send_stop();
2139 qapi_event_send_resume();
2140 return -1;
2141 }
2142
2143 /* We are sending this now, but the CPUs will be resumed shortly later */
2144 qapi_event_send_resume();
2145
2146 cpu_enable_ticks();
2147 runstate_set(RUN_STATE_RUNNING);
2148 vm_state_notify(1, RUN_STATE_RUNNING);
2149 return 0;
2150 }
2151
2152 void vm_start(void)
2153 {
2154 if (!vm_prepare_start()) {
2155 resume_all_vcpus();
2156 }
2157 }
2158
2159 /* does a state transition even if the VM is already stopped,
2160 current state is forgotten forever */
2161 int vm_stop_force_state(RunState state)
2162 {
2163 if (runstate_is_running()) {
2164 return vm_stop(state);
2165 } else {
2166 runstate_set(state);
2167
2168 bdrv_drain_all();
2169 /* Make sure to return an error if the flush in a previous vm_stop()
2170 * failed. */
2171 return bdrv_flush_all();
2172 }
2173 }
2174
2175 void list_cpus(const char *optarg)
2176 {
2177 /* XXX: implement xxx_cpu_list for targets that still miss it */
2178 #if defined(cpu_list)
2179 cpu_list();
2180 #endif
2181 }
2182
2183 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2184 bool has_cpu, int64_t cpu_index, Error **errp)
2185 {
2186 FILE *f;
2187 uint32_t l;
2188 CPUState *cpu;
2189 uint8_t buf[1024];
2190 int64_t orig_addr = addr, orig_size = size;
2191
2192 if (!has_cpu) {
2193 cpu_index = 0;
2194 }
2195
2196 cpu = qemu_get_cpu(cpu_index);
2197 if (cpu == NULL) {
2198 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2199 "a CPU number");
2200 return;
2201 }
2202
2203 f = fopen(filename, "wb");
2204 if (!f) {
2205 error_setg_file_open(errp, errno, filename);
2206 return;
2207 }
2208
2209 while (size != 0) {
2210 l = sizeof(buf);
2211 if (l > size)
2212 l = size;
2213 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2214 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2215 " specified", orig_addr, orig_size);
2216 goto exit;
2217 }
2218 if (fwrite(buf, 1, l, f) != l) {
2219 error_setg(errp, QERR_IO_ERROR);
2220 goto exit;
2221 }
2222 addr += l;
2223 size -= l;
2224 }
2225
2226 exit:
2227 fclose(f);
2228 }
2229
2230 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2231 Error **errp)
2232 {
2233 FILE *f;
2234 uint32_t l;
2235 uint8_t buf[1024];
2236
2237 f = fopen(filename, "wb");
2238 if (!f) {
2239 error_setg_file_open(errp, errno, filename);
2240 return;
2241 }
2242
2243 while (size != 0) {
2244 l = sizeof(buf);
2245 if (l > size)
2246 l = size;
2247 cpu_physical_memory_read(addr, buf, l);
2248 if (fwrite(buf, 1, l, f) != l) {
2249 error_setg(errp, QERR_IO_ERROR);
2250 goto exit;
2251 }
2252 addr += l;
2253 size -= l;
2254 }
2255
2256 exit:
2257 fclose(f);
2258 }
2259
2260 void qmp_inject_nmi(Error **errp)
2261 {
2262 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2263 }
2264
2265 void dump_drift_info(void)
2266 {
2267 if (!use_icount) {
2268 return;
2269 }
2270
2271 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2272 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2273 if (icount_align_option) {
2274 qemu_printf("Max guest delay %"PRIi64" ms\n",
2275 -max_delay / SCALE_MS);
2276 qemu_printf("Max guest advance %"PRIi64" ms\n",
2277 max_advance / SCALE_MS);
2278 } else {
2279 qemu_printf("Max guest delay NA\n");
2280 qemu_printf("Max guest advance NA\n");
2281 }
2282 }