target/riscv: vmfirst find-first-set mask bit
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "qemu/cutils.h"
29 #include "migration/vmstate.h"
30 #include "monitor/monitor.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-misc.h"
33 #include "qapi/qapi-events-run-state.h"
34 #include "qapi/qmp/qerror.h"
35 #include "qemu/error-report.h"
36 #include "qemu/qemu-print.h"
37 #include "sysemu/tcg.h"
38 #include "sysemu/block-backend.h"
39 #include "exec/gdbstub.h"
40 #include "sysemu/dma.h"
41 #include "sysemu/hw_accel.h"
42 #include "sysemu/kvm.h"
43 #include "sysemu/hax.h"
44 #include "sysemu/hvf.h"
45 #include "sysemu/whpx.h"
46 #include "exec/exec-all.h"
47
48 #include "qemu/thread.h"
49 #include "qemu/plugin.h"
50 #include "sysemu/cpus.h"
51 #include "sysemu/qtest.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/option.h"
54 #include "qemu/bitmap.h"
55 #include "qemu/seqlock.h"
56 #include "qemu/guest-random.h"
57 #include "tcg/tcg.h"
58 #include "hw/nmi.h"
59 #include "sysemu/replay.h"
60 #include "sysemu/runstate.h"
61 #include "hw/boards.h"
62 #include "hw/hw.h"
63
64 #ifdef CONFIG_LINUX
65
66 #include <sys/prctl.h>
67
68 #ifndef PR_MCE_KILL
69 #define PR_MCE_KILL 33
70 #endif
71
72 #ifndef PR_MCE_KILL_SET
73 #define PR_MCE_KILL_SET 1
74 #endif
75
76 #ifndef PR_MCE_KILL_EARLY
77 #define PR_MCE_KILL_EARLY 1
78 #endif
79
80 #endif /* CONFIG_LINUX */
81
82 static QemuMutex qemu_global_mutex;
83
84 int64_t max_delay;
85 int64_t max_advance;
86
87 /* vcpu throttling controls */
88 static QEMUTimer *throttle_timer;
89 static unsigned int throttle_percentage;
90
91 #define CPU_THROTTLE_PCT_MIN 1
92 #define CPU_THROTTLE_PCT_MAX 99
93 #define CPU_THROTTLE_TIMESLICE_NS 10000000
94
95 bool cpu_is_stopped(CPUState *cpu)
96 {
97 return cpu->stopped || !runstate_is_running();
98 }
99
100 static inline bool cpu_work_list_empty(CPUState *cpu)
101 {
102 bool ret;
103
104 qemu_mutex_lock(&cpu->work_mutex);
105 ret = QSIMPLEQ_EMPTY(&cpu->work_list);
106 qemu_mutex_unlock(&cpu->work_mutex);
107 return ret;
108 }
109
110 static bool cpu_thread_is_idle(CPUState *cpu)
111 {
112 if (cpu->stop || !cpu_work_list_empty(cpu)) {
113 return false;
114 }
115 if (cpu_is_stopped(cpu)) {
116 return true;
117 }
118 if (!cpu->halted || cpu_has_work(cpu) ||
119 kvm_halt_in_kernel()) {
120 return false;
121 }
122 return true;
123 }
124
125 static bool all_cpu_threads_idle(void)
126 {
127 CPUState *cpu;
128
129 CPU_FOREACH(cpu) {
130 if (!cpu_thread_is_idle(cpu)) {
131 return false;
132 }
133 }
134 return true;
135 }
136
137 /***********************************************************/
138 /* guest cycle counter */
139
140 /* Protected by TimersState seqlock */
141
142 static bool icount_sleep = true;
143 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
144 #define MAX_ICOUNT_SHIFT 10
145
146 typedef struct TimersState {
147 /* Protected by BQL. */
148 int64_t cpu_ticks_prev;
149 int64_t cpu_ticks_offset;
150
151 /* Protect fields that can be respectively read outside the
152 * BQL, and written from multiple threads.
153 */
154 QemuSeqLock vm_clock_seqlock;
155 QemuSpin vm_clock_lock;
156
157 int16_t cpu_ticks_enabled;
158
159 /* Conversion factor from emulated instructions to virtual clock ticks. */
160 int16_t icount_time_shift;
161
162 /* Compensate for varying guest execution speed. */
163 int64_t qemu_icount_bias;
164
165 int64_t vm_clock_warp_start;
166 int64_t cpu_clock_offset;
167
168 /* Only written by TCG thread */
169 int64_t qemu_icount;
170
171 /* for adjusting icount */
172 QEMUTimer *icount_rt_timer;
173 QEMUTimer *icount_vm_timer;
174 QEMUTimer *icount_warp_timer;
175 } TimersState;
176
177 static TimersState timers_state;
178 bool mttcg_enabled;
179
180
181 /* The current number of executed instructions is based on what we
182 * originally budgeted minus the current state of the decrementing
183 * icount counters in extra/u16.low.
184 */
185 static int64_t cpu_get_icount_executed(CPUState *cpu)
186 {
187 return (cpu->icount_budget -
188 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
189 }
190
191 /*
192 * Update the global shared timer_state.qemu_icount to take into
193 * account executed instructions. This is done by the TCG vCPU
194 * thread so the main-loop can see time has moved forward.
195 */
196 static void cpu_update_icount_locked(CPUState *cpu)
197 {
198 int64_t executed = cpu_get_icount_executed(cpu);
199 cpu->icount_budget -= executed;
200
201 atomic_set_i64(&timers_state.qemu_icount,
202 timers_state.qemu_icount + executed);
203 }
204
205 /*
206 * Update the global shared timer_state.qemu_icount to take into
207 * account executed instructions. This is done by the TCG vCPU
208 * thread so the main-loop can see time has moved forward.
209 */
210 void cpu_update_icount(CPUState *cpu)
211 {
212 seqlock_write_lock(&timers_state.vm_clock_seqlock,
213 &timers_state.vm_clock_lock);
214 cpu_update_icount_locked(cpu);
215 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
216 &timers_state.vm_clock_lock);
217 }
218
219 static int64_t cpu_get_icount_raw_locked(void)
220 {
221 CPUState *cpu = current_cpu;
222
223 if (cpu && cpu->running) {
224 if (!cpu->can_do_io) {
225 error_report("Bad icount read");
226 exit(1);
227 }
228 /* Take into account what has run */
229 cpu_update_icount_locked(cpu);
230 }
231 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
232 return atomic_read_i64(&timers_state.qemu_icount);
233 }
234
235 static int64_t cpu_get_icount_locked(void)
236 {
237 int64_t icount = cpu_get_icount_raw_locked();
238 return atomic_read_i64(&timers_state.qemu_icount_bias) +
239 cpu_icount_to_ns(icount);
240 }
241
242 int64_t cpu_get_icount_raw(void)
243 {
244 int64_t icount;
245 unsigned start;
246
247 do {
248 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
249 icount = cpu_get_icount_raw_locked();
250 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
251
252 return icount;
253 }
254
255 /* Return the virtual CPU time, based on the instruction counter. */
256 int64_t cpu_get_icount(void)
257 {
258 int64_t icount;
259 unsigned start;
260
261 do {
262 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
263 icount = cpu_get_icount_locked();
264 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
265
266 return icount;
267 }
268
269 int64_t cpu_icount_to_ns(int64_t icount)
270 {
271 return icount << atomic_read(&timers_state.icount_time_shift);
272 }
273
274 static int64_t cpu_get_ticks_locked(void)
275 {
276 int64_t ticks = timers_state.cpu_ticks_offset;
277 if (timers_state.cpu_ticks_enabled) {
278 ticks += cpu_get_host_ticks();
279 }
280
281 if (timers_state.cpu_ticks_prev > ticks) {
282 /* Non increasing ticks may happen if the host uses software suspend. */
283 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
284 ticks = timers_state.cpu_ticks_prev;
285 }
286
287 timers_state.cpu_ticks_prev = ticks;
288 return ticks;
289 }
290
291 /* return the time elapsed in VM between vm_start and vm_stop. Unless
292 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
293 * counter.
294 */
295 int64_t cpu_get_ticks(void)
296 {
297 int64_t ticks;
298
299 if (use_icount) {
300 return cpu_get_icount();
301 }
302
303 qemu_spin_lock(&timers_state.vm_clock_lock);
304 ticks = cpu_get_ticks_locked();
305 qemu_spin_unlock(&timers_state.vm_clock_lock);
306 return ticks;
307 }
308
309 static int64_t cpu_get_clock_locked(void)
310 {
311 int64_t time;
312
313 time = timers_state.cpu_clock_offset;
314 if (timers_state.cpu_ticks_enabled) {
315 time += get_clock();
316 }
317
318 return time;
319 }
320
321 /* Return the monotonic time elapsed in VM, i.e.,
322 * the time between vm_start and vm_stop
323 */
324 int64_t cpu_get_clock(void)
325 {
326 int64_t ti;
327 unsigned start;
328
329 do {
330 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
331 ti = cpu_get_clock_locked();
332 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
333
334 return ti;
335 }
336
337 /* enable cpu_get_ticks()
338 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
339 */
340 void cpu_enable_ticks(void)
341 {
342 seqlock_write_lock(&timers_state.vm_clock_seqlock,
343 &timers_state.vm_clock_lock);
344 if (!timers_state.cpu_ticks_enabled) {
345 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
346 timers_state.cpu_clock_offset -= get_clock();
347 timers_state.cpu_ticks_enabled = 1;
348 }
349 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
350 &timers_state.vm_clock_lock);
351 }
352
353 /* disable cpu_get_ticks() : the clock is stopped. You must not call
354 * cpu_get_ticks() after that.
355 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
356 */
357 void cpu_disable_ticks(void)
358 {
359 seqlock_write_lock(&timers_state.vm_clock_seqlock,
360 &timers_state.vm_clock_lock);
361 if (timers_state.cpu_ticks_enabled) {
362 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
363 timers_state.cpu_clock_offset = cpu_get_clock_locked();
364 timers_state.cpu_ticks_enabled = 0;
365 }
366 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
367 &timers_state.vm_clock_lock);
368 }
369
370 /* Correlation between real and virtual time is always going to be
371 fairly approximate, so ignore small variation.
372 When the guest is idle real and virtual time will be aligned in
373 the IO wait loop. */
374 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
375
376 static void icount_adjust(void)
377 {
378 int64_t cur_time;
379 int64_t cur_icount;
380 int64_t delta;
381
382 /* Protected by TimersState mutex. */
383 static int64_t last_delta;
384
385 /* If the VM is not running, then do nothing. */
386 if (!runstate_is_running()) {
387 return;
388 }
389
390 seqlock_write_lock(&timers_state.vm_clock_seqlock,
391 &timers_state.vm_clock_lock);
392 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
393 cpu_get_clock_locked());
394 cur_icount = cpu_get_icount_locked();
395
396 delta = cur_icount - cur_time;
397 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
398 if (delta > 0
399 && last_delta + ICOUNT_WOBBLE < delta * 2
400 && timers_state.icount_time_shift > 0) {
401 /* The guest is getting too far ahead. Slow time down. */
402 atomic_set(&timers_state.icount_time_shift,
403 timers_state.icount_time_shift - 1);
404 }
405 if (delta < 0
406 && last_delta - ICOUNT_WOBBLE > delta * 2
407 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
408 /* The guest is getting too far behind. Speed time up. */
409 atomic_set(&timers_state.icount_time_shift,
410 timers_state.icount_time_shift + 1);
411 }
412 last_delta = delta;
413 atomic_set_i64(&timers_state.qemu_icount_bias,
414 cur_icount - (timers_state.qemu_icount
415 << timers_state.icount_time_shift));
416 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
417 &timers_state.vm_clock_lock);
418 }
419
420 static void icount_adjust_rt(void *opaque)
421 {
422 timer_mod(timers_state.icount_rt_timer,
423 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
424 icount_adjust();
425 }
426
427 static void icount_adjust_vm(void *opaque)
428 {
429 timer_mod(timers_state.icount_vm_timer,
430 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
431 NANOSECONDS_PER_SECOND / 10);
432 icount_adjust();
433 }
434
435 static int64_t qemu_icount_round(int64_t count)
436 {
437 int shift = atomic_read(&timers_state.icount_time_shift);
438 return (count + (1 << shift) - 1) >> shift;
439 }
440
441 static void icount_warp_rt(void)
442 {
443 unsigned seq;
444 int64_t warp_start;
445
446 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
447 * changes from -1 to another value, so the race here is okay.
448 */
449 do {
450 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
451 warp_start = timers_state.vm_clock_warp_start;
452 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
453
454 if (warp_start == -1) {
455 return;
456 }
457
458 seqlock_write_lock(&timers_state.vm_clock_seqlock,
459 &timers_state.vm_clock_lock);
460 if (runstate_is_running()) {
461 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
462 cpu_get_clock_locked());
463 int64_t warp_delta;
464
465 warp_delta = clock - timers_state.vm_clock_warp_start;
466 if (use_icount == 2) {
467 /*
468 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
469 * far ahead of real time.
470 */
471 int64_t cur_icount = cpu_get_icount_locked();
472 int64_t delta = clock - cur_icount;
473 warp_delta = MIN(warp_delta, delta);
474 }
475 atomic_set_i64(&timers_state.qemu_icount_bias,
476 timers_state.qemu_icount_bias + warp_delta);
477 }
478 timers_state.vm_clock_warp_start = -1;
479 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
480 &timers_state.vm_clock_lock);
481
482 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
483 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
484 }
485 }
486
487 static void icount_timer_cb(void *opaque)
488 {
489 /* No need for a checkpoint because the timer already synchronizes
490 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
491 */
492 icount_warp_rt();
493 }
494
495 void qtest_clock_warp(int64_t dest)
496 {
497 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
498 AioContext *aio_context;
499 assert(qtest_enabled());
500 aio_context = qemu_get_aio_context();
501 while (clock < dest) {
502 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
503 QEMU_TIMER_ATTR_ALL);
504 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
505
506 seqlock_write_lock(&timers_state.vm_clock_seqlock,
507 &timers_state.vm_clock_lock);
508 atomic_set_i64(&timers_state.qemu_icount_bias,
509 timers_state.qemu_icount_bias + warp);
510 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
511 &timers_state.vm_clock_lock);
512
513 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
514 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
515 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
516 }
517 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
518 }
519
520 void qemu_start_warp_timer(void)
521 {
522 int64_t clock;
523 int64_t deadline;
524
525 if (!use_icount) {
526 return;
527 }
528
529 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
530 * do not fire, so computing the deadline does not make sense.
531 */
532 if (!runstate_is_running()) {
533 return;
534 }
535
536 if (replay_mode != REPLAY_MODE_PLAY) {
537 if (!all_cpu_threads_idle()) {
538 return;
539 }
540
541 if (qtest_enabled()) {
542 /* When testing, qtest commands advance icount. */
543 return;
544 }
545
546 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
547 } else {
548 /* warp clock deterministically in record/replay mode */
549 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
550 /* vCPU is sleeping and warp can't be started.
551 It is probably a race condition: notification sent
552 to vCPU was processed in advance and vCPU went to sleep.
553 Therefore we have to wake it up for doing someting. */
554 if (replay_has_checkpoint()) {
555 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
556 }
557 return;
558 }
559 }
560
561 /* We want to use the earliest deadline from ALL vm_clocks */
562 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
563 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
564 ~QEMU_TIMER_ATTR_EXTERNAL);
565 if (deadline < 0) {
566 static bool notified;
567 if (!icount_sleep && !notified) {
568 warn_report("icount sleep disabled and no active timers");
569 notified = true;
570 }
571 return;
572 }
573
574 if (deadline > 0) {
575 /*
576 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
577 * sleep. Otherwise, the CPU might be waiting for a future timer
578 * interrupt to wake it up, but the interrupt never comes because
579 * the vCPU isn't running any insns and thus doesn't advance the
580 * QEMU_CLOCK_VIRTUAL.
581 */
582 if (!icount_sleep) {
583 /*
584 * We never let VCPUs sleep in no sleep icount mode.
585 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
586 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
587 * It is useful when we want a deterministic execution time,
588 * isolated from host latencies.
589 */
590 seqlock_write_lock(&timers_state.vm_clock_seqlock,
591 &timers_state.vm_clock_lock);
592 atomic_set_i64(&timers_state.qemu_icount_bias,
593 timers_state.qemu_icount_bias + deadline);
594 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
595 &timers_state.vm_clock_lock);
596 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
597 } else {
598 /*
599 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
600 * "real" time, (related to the time left until the next event) has
601 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
602 * This avoids that the warps are visible externally; for example,
603 * you will not be sending network packets continuously instead of
604 * every 100ms.
605 */
606 seqlock_write_lock(&timers_state.vm_clock_seqlock,
607 &timers_state.vm_clock_lock);
608 if (timers_state.vm_clock_warp_start == -1
609 || timers_state.vm_clock_warp_start > clock) {
610 timers_state.vm_clock_warp_start = clock;
611 }
612 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
613 &timers_state.vm_clock_lock);
614 timer_mod_anticipate(timers_state.icount_warp_timer,
615 clock + deadline);
616 }
617 } else if (deadline == 0) {
618 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
619 }
620 }
621
622 static void qemu_account_warp_timer(void)
623 {
624 if (!use_icount || !icount_sleep) {
625 return;
626 }
627
628 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
629 * do not fire, so computing the deadline does not make sense.
630 */
631 if (!runstate_is_running()) {
632 return;
633 }
634
635 /* warp clock deterministically in record/replay mode */
636 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
637 return;
638 }
639
640 timer_del(timers_state.icount_warp_timer);
641 icount_warp_rt();
642 }
643
644 static bool icount_state_needed(void *opaque)
645 {
646 return use_icount;
647 }
648
649 static bool warp_timer_state_needed(void *opaque)
650 {
651 TimersState *s = opaque;
652 return s->icount_warp_timer != NULL;
653 }
654
655 static bool adjust_timers_state_needed(void *opaque)
656 {
657 TimersState *s = opaque;
658 return s->icount_rt_timer != NULL;
659 }
660
661 static bool shift_state_needed(void *opaque)
662 {
663 return use_icount == 2;
664 }
665
666 /*
667 * Subsection for warp timer migration is optional, because may not be created
668 */
669 static const VMStateDescription icount_vmstate_warp_timer = {
670 .name = "timer/icount/warp_timer",
671 .version_id = 1,
672 .minimum_version_id = 1,
673 .needed = warp_timer_state_needed,
674 .fields = (VMStateField[]) {
675 VMSTATE_INT64(vm_clock_warp_start, TimersState),
676 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
677 VMSTATE_END_OF_LIST()
678 }
679 };
680
681 static const VMStateDescription icount_vmstate_adjust_timers = {
682 .name = "timer/icount/timers",
683 .version_id = 1,
684 .minimum_version_id = 1,
685 .needed = adjust_timers_state_needed,
686 .fields = (VMStateField[]) {
687 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
688 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
689 VMSTATE_END_OF_LIST()
690 }
691 };
692
693 static const VMStateDescription icount_vmstate_shift = {
694 .name = "timer/icount/shift",
695 .version_id = 1,
696 .minimum_version_id = 1,
697 .needed = shift_state_needed,
698 .fields = (VMStateField[]) {
699 VMSTATE_INT16(icount_time_shift, TimersState),
700 VMSTATE_END_OF_LIST()
701 }
702 };
703
704 /*
705 * This is a subsection for icount migration.
706 */
707 static const VMStateDescription icount_vmstate_timers = {
708 .name = "timer/icount",
709 .version_id = 1,
710 .minimum_version_id = 1,
711 .needed = icount_state_needed,
712 .fields = (VMStateField[]) {
713 VMSTATE_INT64(qemu_icount_bias, TimersState),
714 VMSTATE_INT64(qemu_icount, TimersState),
715 VMSTATE_END_OF_LIST()
716 },
717 .subsections = (const VMStateDescription*[]) {
718 &icount_vmstate_warp_timer,
719 &icount_vmstate_adjust_timers,
720 &icount_vmstate_shift,
721 NULL
722 }
723 };
724
725 static const VMStateDescription vmstate_timers = {
726 .name = "timer",
727 .version_id = 2,
728 .minimum_version_id = 1,
729 .fields = (VMStateField[]) {
730 VMSTATE_INT64(cpu_ticks_offset, TimersState),
731 VMSTATE_UNUSED(8),
732 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
733 VMSTATE_END_OF_LIST()
734 },
735 .subsections = (const VMStateDescription*[]) {
736 &icount_vmstate_timers,
737 NULL
738 }
739 };
740
741 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
742 {
743 double pct;
744 double throttle_ratio;
745 int64_t sleeptime_ns, endtime_ns;
746
747 if (!cpu_throttle_get_percentage()) {
748 return;
749 }
750
751 pct = (double)cpu_throttle_get_percentage()/100;
752 throttle_ratio = pct / (1 - pct);
753 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
754 sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
755 endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
756 while (sleeptime_ns > 0 && !cpu->stop) {
757 if (sleeptime_ns > SCALE_MS) {
758 qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
759 sleeptime_ns / SCALE_MS);
760 } else {
761 qemu_mutex_unlock_iothread();
762 g_usleep(sleeptime_ns / SCALE_US);
763 qemu_mutex_lock_iothread();
764 }
765 sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
766 }
767 atomic_set(&cpu->throttle_thread_scheduled, 0);
768 }
769
770 static void cpu_throttle_timer_tick(void *opaque)
771 {
772 CPUState *cpu;
773 double pct;
774
775 /* Stop the timer if needed */
776 if (!cpu_throttle_get_percentage()) {
777 return;
778 }
779 CPU_FOREACH(cpu) {
780 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
781 async_run_on_cpu(cpu, cpu_throttle_thread,
782 RUN_ON_CPU_NULL);
783 }
784 }
785
786 pct = (double)cpu_throttle_get_percentage()/100;
787 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
788 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
789 }
790
791 void cpu_throttle_set(int new_throttle_pct)
792 {
793 /* Ensure throttle percentage is within valid range */
794 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
795 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
796
797 atomic_set(&throttle_percentage, new_throttle_pct);
798
799 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
800 CPU_THROTTLE_TIMESLICE_NS);
801 }
802
803 void cpu_throttle_stop(void)
804 {
805 atomic_set(&throttle_percentage, 0);
806 }
807
808 bool cpu_throttle_active(void)
809 {
810 return (cpu_throttle_get_percentage() != 0);
811 }
812
813 int cpu_throttle_get_percentage(void)
814 {
815 return atomic_read(&throttle_percentage);
816 }
817
818 void cpu_ticks_init(void)
819 {
820 seqlock_init(&timers_state.vm_clock_seqlock);
821 qemu_spin_init(&timers_state.vm_clock_lock);
822 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
823 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
824 cpu_throttle_timer_tick, NULL);
825 }
826
827 void configure_icount(QemuOpts *opts, Error **errp)
828 {
829 const char *option = qemu_opt_get(opts, "shift");
830 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
831 bool align = qemu_opt_get_bool(opts, "align", false);
832 long time_shift = -1;
833
834 if (!option) {
835 if (qemu_opt_get(opts, "align") != NULL) {
836 error_setg(errp, "Please specify shift option when using align");
837 }
838 return;
839 }
840
841 if (align && !sleep) {
842 error_setg(errp, "align=on and sleep=off are incompatible");
843 return;
844 }
845
846 if (strcmp(option, "auto") != 0) {
847 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
848 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
849 error_setg(errp, "icount: Invalid shift value");
850 return;
851 }
852 } else if (icount_align_option) {
853 error_setg(errp, "shift=auto and align=on are incompatible");
854 return;
855 } else if (!icount_sleep) {
856 error_setg(errp, "shift=auto and sleep=off are incompatible");
857 return;
858 }
859
860 icount_sleep = sleep;
861 if (icount_sleep) {
862 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
863 icount_timer_cb, NULL);
864 }
865
866 icount_align_option = align;
867
868 if (time_shift >= 0) {
869 timers_state.icount_time_shift = time_shift;
870 use_icount = 1;
871 return;
872 }
873
874 use_icount = 2;
875
876 /* 125MIPS seems a reasonable initial guess at the guest speed.
877 It will be corrected fairly quickly anyway. */
878 timers_state.icount_time_shift = 3;
879
880 /* Have both realtime and virtual time triggers for speed adjustment.
881 The realtime trigger catches emulated time passing too slowly,
882 the virtual time trigger catches emulated time passing too fast.
883 Realtime triggers occur even when idle, so use them less frequently
884 than VM triggers. */
885 timers_state.vm_clock_warp_start = -1;
886 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
887 icount_adjust_rt, NULL);
888 timer_mod(timers_state.icount_rt_timer,
889 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
890 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
891 icount_adjust_vm, NULL);
892 timer_mod(timers_state.icount_vm_timer,
893 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
894 NANOSECONDS_PER_SECOND / 10);
895 }
896
897 /***********************************************************/
898 /* TCG vCPU kick timer
899 *
900 * The kick timer is responsible for moving single threaded vCPU
901 * emulation on to the next vCPU. If more than one vCPU is running a
902 * timer event with force a cpu->exit so the next vCPU can get
903 * scheduled.
904 *
905 * The timer is removed if all vCPUs are idle and restarted again once
906 * idleness is complete.
907 */
908
909 static QEMUTimer *tcg_kick_vcpu_timer;
910 static CPUState *tcg_current_rr_cpu;
911
912 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
913
914 static inline int64_t qemu_tcg_next_kick(void)
915 {
916 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
917 }
918
919 /* Kick the currently round-robin scheduled vCPU to next */
920 static void qemu_cpu_kick_rr_next_cpu(void)
921 {
922 CPUState *cpu;
923 do {
924 cpu = atomic_mb_read(&tcg_current_rr_cpu);
925 if (cpu) {
926 cpu_exit(cpu);
927 }
928 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
929 }
930
931 /* Kick all RR vCPUs */
932 static void qemu_cpu_kick_rr_cpus(void)
933 {
934 CPUState *cpu;
935
936 CPU_FOREACH(cpu) {
937 cpu_exit(cpu);
938 };
939 }
940
941 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
942 {
943 }
944
945 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
946 {
947 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
948 qemu_notify_event();
949 return;
950 }
951
952 if (qemu_in_vcpu_thread()) {
953 /* A CPU is currently running; kick it back out to the
954 * tcg_cpu_exec() loop so it will recalculate its
955 * icount deadline immediately.
956 */
957 qemu_cpu_kick(current_cpu);
958 } else if (first_cpu) {
959 /* qemu_cpu_kick is not enough to kick a halted CPU out of
960 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
961 * causes cpu_thread_is_idle to return false. This way,
962 * handle_icount_deadline can run.
963 * If we have no CPUs at all for some reason, we don't
964 * need to do anything.
965 */
966 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
967 }
968 }
969
970 static void kick_tcg_thread(void *opaque)
971 {
972 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
973 qemu_cpu_kick_rr_next_cpu();
974 }
975
976 static void start_tcg_kick_timer(void)
977 {
978 assert(!mttcg_enabled);
979 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
980 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
981 kick_tcg_thread, NULL);
982 }
983 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
984 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
985 }
986 }
987
988 static void stop_tcg_kick_timer(void)
989 {
990 assert(!mttcg_enabled);
991 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
992 timer_del(tcg_kick_vcpu_timer);
993 }
994 }
995
996 /***********************************************************/
997 void hw_error(const char *fmt, ...)
998 {
999 va_list ap;
1000 CPUState *cpu;
1001
1002 va_start(ap, fmt);
1003 fprintf(stderr, "qemu: hardware error: ");
1004 vfprintf(stderr, fmt, ap);
1005 fprintf(stderr, "\n");
1006 CPU_FOREACH(cpu) {
1007 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
1008 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
1009 }
1010 va_end(ap);
1011 abort();
1012 }
1013
1014 void cpu_synchronize_all_states(void)
1015 {
1016 CPUState *cpu;
1017
1018 CPU_FOREACH(cpu) {
1019 cpu_synchronize_state(cpu);
1020 /* TODO: move to cpu_synchronize_state() */
1021 if (hvf_enabled()) {
1022 hvf_cpu_synchronize_state(cpu);
1023 }
1024 }
1025 }
1026
1027 void cpu_synchronize_all_post_reset(void)
1028 {
1029 CPUState *cpu;
1030
1031 CPU_FOREACH(cpu) {
1032 cpu_synchronize_post_reset(cpu);
1033 /* TODO: move to cpu_synchronize_post_reset() */
1034 if (hvf_enabled()) {
1035 hvf_cpu_synchronize_post_reset(cpu);
1036 }
1037 }
1038 }
1039
1040 void cpu_synchronize_all_post_init(void)
1041 {
1042 CPUState *cpu;
1043
1044 CPU_FOREACH(cpu) {
1045 cpu_synchronize_post_init(cpu);
1046 /* TODO: move to cpu_synchronize_post_init() */
1047 if (hvf_enabled()) {
1048 hvf_cpu_synchronize_post_init(cpu);
1049 }
1050 }
1051 }
1052
1053 void cpu_synchronize_all_pre_loadvm(void)
1054 {
1055 CPUState *cpu;
1056
1057 CPU_FOREACH(cpu) {
1058 cpu_synchronize_pre_loadvm(cpu);
1059 }
1060 }
1061
1062 static int do_vm_stop(RunState state, bool send_stop)
1063 {
1064 int ret = 0;
1065
1066 if (runstate_is_running()) {
1067 runstate_set(state);
1068 cpu_disable_ticks();
1069 pause_all_vcpus();
1070 vm_state_notify(0, state);
1071 if (send_stop) {
1072 qapi_event_send_stop();
1073 }
1074 }
1075
1076 bdrv_drain_all();
1077 ret = bdrv_flush_all();
1078
1079 return ret;
1080 }
1081
1082 /* Special vm_stop() variant for terminating the process. Historically clients
1083 * did not expect a QMP STOP event and so we need to retain compatibility.
1084 */
1085 int vm_shutdown(void)
1086 {
1087 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1088 }
1089
1090 static bool cpu_can_run(CPUState *cpu)
1091 {
1092 if (cpu->stop) {
1093 return false;
1094 }
1095 if (cpu_is_stopped(cpu)) {
1096 return false;
1097 }
1098 return true;
1099 }
1100
1101 static void cpu_handle_guest_debug(CPUState *cpu)
1102 {
1103 gdb_set_stop_cpu(cpu);
1104 qemu_system_debug_request();
1105 cpu->stopped = true;
1106 }
1107
1108 #ifdef CONFIG_LINUX
1109 static void sigbus_reraise(void)
1110 {
1111 sigset_t set;
1112 struct sigaction action;
1113
1114 memset(&action, 0, sizeof(action));
1115 action.sa_handler = SIG_DFL;
1116 if (!sigaction(SIGBUS, &action, NULL)) {
1117 raise(SIGBUS);
1118 sigemptyset(&set);
1119 sigaddset(&set, SIGBUS);
1120 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1121 }
1122 perror("Failed to re-raise SIGBUS!\n");
1123 abort();
1124 }
1125
1126 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1127 {
1128 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1129 sigbus_reraise();
1130 }
1131
1132 if (current_cpu) {
1133 /* Called asynchronously in VCPU thread. */
1134 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1135 sigbus_reraise();
1136 }
1137 } else {
1138 /* Called synchronously (via signalfd) in main thread. */
1139 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1140 sigbus_reraise();
1141 }
1142 }
1143 }
1144
1145 static void qemu_init_sigbus(void)
1146 {
1147 struct sigaction action;
1148
1149 memset(&action, 0, sizeof(action));
1150 action.sa_flags = SA_SIGINFO;
1151 action.sa_sigaction = sigbus_handler;
1152 sigaction(SIGBUS, &action, NULL);
1153
1154 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1155 }
1156 #else /* !CONFIG_LINUX */
1157 static void qemu_init_sigbus(void)
1158 {
1159 }
1160 #endif /* !CONFIG_LINUX */
1161
1162 static QemuThread io_thread;
1163
1164 /* cpu creation */
1165 static QemuCond qemu_cpu_cond;
1166 /* system init */
1167 static QemuCond qemu_pause_cond;
1168
1169 void qemu_init_cpu_loop(void)
1170 {
1171 qemu_init_sigbus();
1172 qemu_cond_init(&qemu_cpu_cond);
1173 qemu_cond_init(&qemu_pause_cond);
1174 qemu_mutex_init(&qemu_global_mutex);
1175
1176 qemu_thread_get_self(&io_thread);
1177 }
1178
1179 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1180 {
1181 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1182 }
1183
1184 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1185 {
1186 if (kvm_destroy_vcpu(cpu) < 0) {
1187 error_report("kvm_destroy_vcpu failed");
1188 exit(EXIT_FAILURE);
1189 }
1190 }
1191
1192 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1193 {
1194 }
1195
1196 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1197 {
1198 g_assert(qemu_cpu_is_self(cpu));
1199 cpu->stop = false;
1200 cpu->stopped = true;
1201 if (exit) {
1202 cpu_exit(cpu);
1203 }
1204 qemu_cond_broadcast(&qemu_pause_cond);
1205 }
1206
1207 static void qemu_wait_io_event_common(CPUState *cpu)
1208 {
1209 atomic_mb_set(&cpu->thread_kicked, false);
1210 if (cpu->stop) {
1211 qemu_cpu_stop(cpu, false);
1212 }
1213 process_queued_cpu_work(cpu);
1214 }
1215
1216 static void qemu_tcg_rr_wait_io_event(void)
1217 {
1218 CPUState *cpu;
1219
1220 while (all_cpu_threads_idle()) {
1221 stop_tcg_kick_timer();
1222 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1223 }
1224
1225 start_tcg_kick_timer();
1226
1227 CPU_FOREACH(cpu) {
1228 qemu_wait_io_event_common(cpu);
1229 }
1230 }
1231
1232 static void qemu_wait_io_event(CPUState *cpu)
1233 {
1234 bool slept = false;
1235
1236 while (cpu_thread_is_idle(cpu)) {
1237 if (!slept) {
1238 slept = true;
1239 qemu_plugin_vcpu_idle_cb(cpu);
1240 }
1241 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1242 }
1243 if (slept) {
1244 qemu_plugin_vcpu_resume_cb(cpu);
1245 }
1246
1247 #ifdef _WIN32
1248 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1249 if (!tcg_enabled()) {
1250 SleepEx(0, TRUE);
1251 }
1252 #endif
1253 qemu_wait_io_event_common(cpu);
1254 }
1255
1256 static void *qemu_kvm_cpu_thread_fn(void *arg)
1257 {
1258 CPUState *cpu = arg;
1259 int r;
1260
1261 rcu_register_thread();
1262
1263 qemu_mutex_lock_iothread();
1264 qemu_thread_get_self(cpu->thread);
1265 cpu->thread_id = qemu_get_thread_id();
1266 cpu->can_do_io = 1;
1267 current_cpu = cpu;
1268
1269 r = kvm_init_vcpu(cpu);
1270 if (r < 0) {
1271 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1272 exit(1);
1273 }
1274
1275 kvm_init_cpu_signals(cpu);
1276
1277 /* signal CPU creation */
1278 cpu->created = true;
1279 qemu_cond_signal(&qemu_cpu_cond);
1280 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1281
1282 do {
1283 if (cpu_can_run(cpu)) {
1284 r = kvm_cpu_exec(cpu);
1285 if (r == EXCP_DEBUG) {
1286 cpu_handle_guest_debug(cpu);
1287 }
1288 }
1289 qemu_wait_io_event(cpu);
1290 } while (!cpu->unplug || cpu_can_run(cpu));
1291
1292 qemu_kvm_destroy_vcpu(cpu);
1293 cpu->created = false;
1294 qemu_cond_signal(&qemu_cpu_cond);
1295 qemu_mutex_unlock_iothread();
1296 rcu_unregister_thread();
1297 return NULL;
1298 }
1299
1300 static void *qemu_dummy_cpu_thread_fn(void *arg)
1301 {
1302 #ifdef _WIN32
1303 error_report("qtest is not supported under Windows");
1304 exit(1);
1305 #else
1306 CPUState *cpu = arg;
1307 sigset_t waitset;
1308 int r;
1309
1310 rcu_register_thread();
1311
1312 qemu_mutex_lock_iothread();
1313 qemu_thread_get_self(cpu->thread);
1314 cpu->thread_id = qemu_get_thread_id();
1315 cpu->can_do_io = 1;
1316 current_cpu = cpu;
1317
1318 sigemptyset(&waitset);
1319 sigaddset(&waitset, SIG_IPI);
1320
1321 /* signal CPU creation */
1322 cpu->created = true;
1323 qemu_cond_signal(&qemu_cpu_cond);
1324 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1325
1326 do {
1327 qemu_mutex_unlock_iothread();
1328 do {
1329 int sig;
1330 r = sigwait(&waitset, &sig);
1331 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1332 if (r == -1) {
1333 perror("sigwait");
1334 exit(1);
1335 }
1336 qemu_mutex_lock_iothread();
1337 qemu_wait_io_event(cpu);
1338 } while (!cpu->unplug);
1339
1340 qemu_mutex_unlock_iothread();
1341 rcu_unregister_thread();
1342 return NULL;
1343 #endif
1344 }
1345
1346 static int64_t tcg_get_icount_limit(void)
1347 {
1348 int64_t deadline;
1349
1350 if (replay_mode != REPLAY_MODE_PLAY) {
1351 /*
1352 * Include all the timers, because they may need an attention.
1353 * Too long CPU execution may create unnecessary delay in UI.
1354 */
1355 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1356 QEMU_TIMER_ATTR_ALL);
1357 /* Check realtime timers, because they help with input processing */
1358 deadline = qemu_soonest_timeout(deadline,
1359 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1360 QEMU_TIMER_ATTR_ALL));
1361
1362 /* Maintain prior (possibly buggy) behaviour where if no deadline
1363 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1364 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1365 * nanoseconds.
1366 */
1367 if ((deadline < 0) || (deadline > INT32_MAX)) {
1368 deadline = INT32_MAX;
1369 }
1370
1371 return qemu_icount_round(deadline);
1372 } else {
1373 return replay_get_instructions();
1374 }
1375 }
1376
1377 static void notify_aio_contexts(void)
1378 {
1379 /* Wake up other AioContexts. */
1380 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1381 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1382 }
1383
1384 static void handle_icount_deadline(void)
1385 {
1386 assert(qemu_in_vcpu_thread());
1387 if (use_icount) {
1388 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1389 QEMU_TIMER_ATTR_ALL);
1390
1391 if (deadline == 0) {
1392 notify_aio_contexts();
1393 }
1394 }
1395 }
1396
1397 static void prepare_icount_for_run(CPUState *cpu)
1398 {
1399 if (use_icount) {
1400 int insns_left;
1401
1402 /* These should always be cleared by process_icount_data after
1403 * each vCPU execution. However u16.high can be raised
1404 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1405 */
1406 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
1407 g_assert(cpu->icount_extra == 0);
1408
1409 cpu->icount_budget = tcg_get_icount_limit();
1410 insns_left = MIN(0xffff, cpu->icount_budget);
1411 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1412 cpu->icount_extra = cpu->icount_budget - insns_left;
1413
1414 replay_mutex_lock();
1415
1416 if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
1417 notify_aio_contexts();
1418 }
1419 }
1420 }
1421
1422 static void process_icount_data(CPUState *cpu)
1423 {
1424 if (use_icount) {
1425 /* Account for executed instructions */
1426 cpu_update_icount(cpu);
1427
1428 /* Reset the counters */
1429 cpu_neg(cpu)->icount_decr.u16.low = 0;
1430 cpu->icount_extra = 0;
1431 cpu->icount_budget = 0;
1432
1433 replay_account_executed_instructions();
1434
1435 replay_mutex_unlock();
1436 }
1437 }
1438
1439
1440 static int tcg_cpu_exec(CPUState *cpu)
1441 {
1442 int ret;
1443 #ifdef CONFIG_PROFILER
1444 int64_t ti;
1445 #endif
1446
1447 assert(tcg_enabled());
1448 #ifdef CONFIG_PROFILER
1449 ti = profile_getclock();
1450 #endif
1451 cpu_exec_start(cpu);
1452 ret = cpu_exec(cpu);
1453 cpu_exec_end(cpu);
1454 #ifdef CONFIG_PROFILER
1455 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1456 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1457 #endif
1458 return ret;
1459 }
1460
1461 /* Destroy any remaining vCPUs which have been unplugged and have
1462 * finished running
1463 */
1464 static void deal_with_unplugged_cpus(void)
1465 {
1466 CPUState *cpu;
1467
1468 CPU_FOREACH(cpu) {
1469 if (cpu->unplug && !cpu_can_run(cpu)) {
1470 qemu_tcg_destroy_vcpu(cpu);
1471 cpu->created = false;
1472 qemu_cond_signal(&qemu_cpu_cond);
1473 break;
1474 }
1475 }
1476 }
1477
1478 /* Single-threaded TCG
1479 *
1480 * In the single-threaded case each vCPU is simulated in turn. If
1481 * there is more than a single vCPU we create a simple timer to kick
1482 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1483 * This is done explicitly rather than relying on side-effects
1484 * elsewhere.
1485 */
1486
1487 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1488 {
1489 CPUState *cpu = arg;
1490
1491 assert(tcg_enabled());
1492 rcu_register_thread();
1493 tcg_register_thread();
1494
1495 qemu_mutex_lock_iothread();
1496 qemu_thread_get_self(cpu->thread);
1497
1498 cpu->thread_id = qemu_get_thread_id();
1499 cpu->created = true;
1500 cpu->can_do_io = 1;
1501 qemu_cond_signal(&qemu_cpu_cond);
1502 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1503
1504 /* wait for initial kick-off after machine start */
1505 while (first_cpu->stopped) {
1506 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1507
1508 /* process any pending work */
1509 CPU_FOREACH(cpu) {
1510 current_cpu = cpu;
1511 qemu_wait_io_event_common(cpu);
1512 }
1513 }
1514
1515 start_tcg_kick_timer();
1516
1517 cpu = first_cpu;
1518
1519 /* process any pending work */
1520 cpu->exit_request = 1;
1521
1522 while (1) {
1523 qemu_mutex_unlock_iothread();
1524 replay_mutex_lock();
1525 qemu_mutex_lock_iothread();
1526 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1527 qemu_account_warp_timer();
1528
1529 /* Run the timers here. This is much more efficient than
1530 * waking up the I/O thread and waiting for completion.
1531 */
1532 handle_icount_deadline();
1533
1534 replay_mutex_unlock();
1535
1536 if (!cpu) {
1537 cpu = first_cpu;
1538 }
1539
1540 while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
1541
1542 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1543 current_cpu = cpu;
1544
1545 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1546 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1547
1548 if (cpu_can_run(cpu)) {
1549 int r;
1550
1551 qemu_mutex_unlock_iothread();
1552 prepare_icount_for_run(cpu);
1553
1554 r = tcg_cpu_exec(cpu);
1555
1556 process_icount_data(cpu);
1557 qemu_mutex_lock_iothread();
1558
1559 if (r == EXCP_DEBUG) {
1560 cpu_handle_guest_debug(cpu);
1561 break;
1562 } else if (r == EXCP_ATOMIC) {
1563 qemu_mutex_unlock_iothread();
1564 cpu_exec_step_atomic(cpu);
1565 qemu_mutex_lock_iothread();
1566 break;
1567 }
1568 } else if (cpu->stop) {
1569 if (cpu->unplug) {
1570 cpu = CPU_NEXT(cpu);
1571 }
1572 break;
1573 }
1574
1575 cpu = CPU_NEXT(cpu);
1576 } /* while (cpu && !cpu->exit_request).. */
1577
1578 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1579 atomic_set(&tcg_current_rr_cpu, NULL);
1580
1581 if (cpu && cpu->exit_request) {
1582 atomic_mb_set(&cpu->exit_request, 0);
1583 }
1584
1585 if (use_icount && all_cpu_threads_idle()) {
1586 /*
1587 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1588 * in the main_loop, wake it up in order to start the warp timer.
1589 */
1590 qemu_notify_event();
1591 }
1592
1593 qemu_tcg_rr_wait_io_event();
1594 deal_with_unplugged_cpus();
1595 }
1596
1597 rcu_unregister_thread();
1598 return NULL;
1599 }
1600
1601 static void *qemu_hax_cpu_thread_fn(void *arg)
1602 {
1603 CPUState *cpu = arg;
1604 int r;
1605
1606 rcu_register_thread();
1607 qemu_mutex_lock_iothread();
1608 qemu_thread_get_self(cpu->thread);
1609
1610 cpu->thread_id = qemu_get_thread_id();
1611 cpu->created = true;
1612 current_cpu = cpu;
1613
1614 hax_init_vcpu(cpu);
1615 qemu_cond_signal(&qemu_cpu_cond);
1616 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1617
1618 do {
1619 if (cpu_can_run(cpu)) {
1620 r = hax_smp_cpu_exec(cpu);
1621 if (r == EXCP_DEBUG) {
1622 cpu_handle_guest_debug(cpu);
1623 }
1624 }
1625
1626 qemu_wait_io_event(cpu);
1627 } while (!cpu->unplug || cpu_can_run(cpu));
1628 rcu_unregister_thread();
1629 return NULL;
1630 }
1631
1632 /* The HVF-specific vCPU thread function. This one should only run when the host
1633 * CPU supports the VMX "unrestricted guest" feature. */
1634 static void *qemu_hvf_cpu_thread_fn(void *arg)
1635 {
1636 CPUState *cpu = arg;
1637
1638 int r;
1639
1640 assert(hvf_enabled());
1641
1642 rcu_register_thread();
1643
1644 qemu_mutex_lock_iothread();
1645 qemu_thread_get_self(cpu->thread);
1646
1647 cpu->thread_id = qemu_get_thread_id();
1648 cpu->can_do_io = 1;
1649 current_cpu = cpu;
1650
1651 hvf_init_vcpu(cpu);
1652
1653 /* signal CPU creation */
1654 cpu->created = true;
1655 qemu_cond_signal(&qemu_cpu_cond);
1656 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1657
1658 do {
1659 if (cpu_can_run(cpu)) {
1660 r = hvf_vcpu_exec(cpu);
1661 if (r == EXCP_DEBUG) {
1662 cpu_handle_guest_debug(cpu);
1663 }
1664 }
1665 qemu_wait_io_event(cpu);
1666 } while (!cpu->unplug || cpu_can_run(cpu));
1667
1668 hvf_vcpu_destroy(cpu);
1669 cpu->created = false;
1670 qemu_cond_signal(&qemu_cpu_cond);
1671 qemu_mutex_unlock_iothread();
1672 rcu_unregister_thread();
1673 return NULL;
1674 }
1675
1676 static void *qemu_whpx_cpu_thread_fn(void *arg)
1677 {
1678 CPUState *cpu = arg;
1679 int r;
1680
1681 rcu_register_thread();
1682
1683 qemu_mutex_lock_iothread();
1684 qemu_thread_get_self(cpu->thread);
1685 cpu->thread_id = qemu_get_thread_id();
1686 current_cpu = cpu;
1687
1688 r = whpx_init_vcpu(cpu);
1689 if (r < 0) {
1690 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1691 exit(1);
1692 }
1693
1694 /* signal CPU creation */
1695 cpu->created = true;
1696 qemu_cond_signal(&qemu_cpu_cond);
1697 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1698
1699 do {
1700 if (cpu_can_run(cpu)) {
1701 r = whpx_vcpu_exec(cpu);
1702 if (r == EXCP_DEBUG) {
1703 cpu_handle_guest_debug(cpu);
1704 }
1705 }
1706 while (cpu_thread_is_idle(cpu)) {
1707 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1708 }
1709 qemu_wait_io_event_common(cpu);
1710 } while (!cpu->unplug || cpu_can_run(cpu));
1711
1712 whpx_destroy_vcpu(cpu);
1713 cpu->created = false;
1714 qemu_cond_signal(&qemu_cpu_cond);
1715 qemu_mutex_unlock_iothread();
1716 rcu_unregister_thread();
1717 return NULL;
1718 }
1719
1720 #ifdef _WIN32
1721 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1722 {
1723 }
1724 #endif
1725
1726 /* Multi-threaded TCG
1727 *
1728 * In the multi-threaded case each vCPU has its own thread. The TLS
1729 * variable current_cpu can be used deep in the code to find the
1730 * current CPUState for a given thread.
1731 */
1732
1733 static void *qemu_tcg_cpu_thread_fn(void *arg)
1734 {
1735 CPUState *cpu = arg;
1736
1737 assert(tcg_enabled());
1738 g_assert(!use_icount);
1739
1740 rcu_register_thread();
1741 tcg_register_thread();
1742
1743 qemu_mutex_lock_iothread();
1744 qemu_thread_get_self(cpu->thread);
1745
1746 cpu->thread_id = qemu_get_thread_id();
1747 cpu->created = true;
1748 cpu->can_do_io = 1;
1749 current_cpu = cpu;
1750 qemu_cond_signal(&qemu_cpu_cond);
1751 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1752
1753 /* process any pending work */
1754 cpu->exit_request = 1;
1755
1756 do {
1757 if (cpu_can_run(cpu)) {
1758 int r;
1759 qemu_mutex_unlock_iothread();
1760 r = tcg_cpu_exec(cpu);
1761 qemu_mutex_lock_iothread();
1762 switch (r) {
1763 case EXCP_DEBUG:
1764 cpu_handle_guest_debug(cpu);
1765 break;
1766 case EXCP_HALTED:
1767 /* during start-up the vCPU is reset and the thread is
1768 * kicked several times. If we don't ensure we go back
1769 * to sleep in the halted state we won't cleanly
1770 * start-up when the vCPU is enabled.
1771 *
1772 * cpu->halted should ensure we sleep in wait_io_event
1773 */
1774 g_assert(cpu->halted);
1775 break;
1776 case EXCP_ATOMIC:
1777 qemu_mutex_unlock_iothread();
1778 cpu_exec_step_atomic(cpu);
1779 qemu_mutex_lock_iothread();
1780 default:
1781 /* Ignore everything else? */
1782 break;
1783 }
1784 }
1785
1786 atomic_mb_set(&cpu->exit_request, 0);
1787 qemu_wait_io_event(cpu);
1788 } while (!cpu->unplug || cpu_can_run(cpu));
1789
1790 qemu_tcg_destroy_vcpu(cpu);
1791 cpu->created = false;
1792 qemu_cond_signal(&qemu_cpu_cond);
1793 qemu_mutex_unlock_iothread();
1794 rcu_unregister_thread();
1795 return NULL;
1796 }
1797
1798 static void qemu_cpu_kick_thread(CPUState *cpu)
1799 {
1800 #ifndef _WIN32
1801 int err;
1802
1803 if (cpu->thread_kicked) {
1804 return;
1805 }
1806 cpu->thread_kicked = true;
1807 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1808 if (err && err != ESRCH) {
1809 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1810 exit(1);
1811 }
1812 #else /* _WIN32 */
1813 if (!qemu_cpu_is_self(cpu)) {
1814 if (whpx_enabled()) {
1815 whpx_vcpu_kick(cpu);
1816 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1817 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1818 __func__, GetLastError());
1819 exit(1);
1820 }
1821 }
1822 #endif
1823 }
1824
1825 void qemu_cpu_kick(CPUState *cpu)
1826 {
1827 qemu_cond_broadcast(cpu->halt_cond);
1828 if (tcg_enabled()) {
1829 if (qemu_tcg_mttcg_enabled()) {
1830 cpu_exit(cpu);
1831 } else {
1832 qemu_cpu_kick_rr_cpus();
1833 }
1834 } else {
1835 if (hax_enabled()) {
1836 /*
1837 * FIXME: race condition with the exit_request check in
1838 * hax_vcpu_hax_exec
1839 */
1840 cpu->exit_request = 1;
1841 }
1842 qemu_cpu_kick_thread(cpu);
1843 }
1844 }
1845
1846 void qemu_cpu_kick_self(void)
1847 {
1848 assert(current_cpu);
1849 qemu_cpu_kick_thread(current_cpu);
1850 }
1851
1852 bool qemu_cpu_is_self(CPUState *cpu)
1853 {
1854 return qemu_thread_is_self(cpu->thread);
1855 }
1856
1857 bool qemu_in_vcpu_thread(void)
1858 {
1859 return current_cpu && qemu_cpu_is_self(current_cpu);
1860 }
1861
1862 static __thread bool iothread_locked = false;
1863
1864 bool qemu_mutex_iothread_locked(void)
1865 {
1866 return iothread_locked;
1867 }
1868
1869 /*
1870 * The BQL is taken from so many places that it is worth profiling the
1871 * callers directly, instead of funneling them all through a single function.
1872 */
1873 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1874 {
1875 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1876
1877 g_assert(!qemu_mutex_iothread_locked());
1878 bql_lock(&qemu_global_mutex, file, line);
1879 iothread_locked = true;
1880 }
1881
1882 void qemu_mutex_unlock_iothread(void)
1883 {
1884 g_assert(qemu_mutex_iothread_locked());
1885 iothread_locked = false;
1886 qemu_mutex_unlock(&qemu_global_mutex);
1887 }
1888
1889 void qemu_cond_wait_iothread(QemuCond *cond)
1890 {
1891 qemu_cond_wait(cond, &qemu_global_mutex);
1892 }
1893
1894 static bool all_vcpus_paused(void)
1895 {
1896 CPUState *cpu;
1897
1898 CPU_FOREACH(cpu) {
1899 if (!cpu->stopped) {
1900 return false;
1901 }
1902 }
1903
1904 return true;
1905 }
1906
1907 void pause_all_vcpus(void)
1908 {
1909 CPUState *cpu;
1910
1911 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1912 CPU_FOREACH(cpu) {
1913 if (qemu_cpu_is_self(cpu)) {
1914 qemu_cpu_stop(cpu, true);
1915 } else {
1916 cpu->stop = true;
1917 qemu_cpu_kick(cpu);
1918 }
1919 }
1920
1921 /* We need to drop the replay_lock so any vCPU threads woken up
1922 * can finish their replay tasks
1923 */
1924 replay_mutex_unlock();
1925
1926 while (!all_vcpus_paused()) {
1927 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1928 CPU_FOREACH(cpu) {
1929 qemu_cpu_kick(cpu);
1930 }
1931 }
1932
1933 qemu_mutex_unlock_iothread();
1934 replay_mutex_lock();
1935 qemu_mutex_lock_iothread();
1936 }
1937
1938 void cpu_resume(CPUState *cpu)
1939 {
1940 cpu->stop = false;
1941 cpu->stopped = false;
1942 qemu_cpu_kick(cpu);
1943 }
1944
1945 void resume_all_vcpus(void)
1946 {
1947 CPUState *cpu;
1948
1949 if (!runstate_is_running()) {
1950 return;
1951 }
1952
1953 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1954 CPU_FOREACH(cpu) {
1955 cpu_resume(cpu);
1956 }
1957 }
1958
1959 void cpu_remove_sync(CPUState *cpu)
1960 {
1961 cpu->stop = true;
1962 cpu->unplug = true;
1963 qemu_cpu_kick(cpu);
1964 qemu_mutex_unlock_iothread();
1965 qemu_thread_join(cpu->thread);
1966 qemu_mutex_lock_iothread();
1967 }
1968
1969 /* For temporary buffers for forming a name */
1970 #define VCPU_THREAD_NAME_SIZE 16
1971
1972 static void qemu_tcg_init_vcpu(CPUState *cpu)
1973 {
1974 char thread_name[VCPU_THREAD_NAME_SIZE];
1975 static QemuCond *single_tcg_halt_cond;
1976 static QemuThread *single_tcg_cpu_thread;
1977 static int tcg_region_inited;
1978
1979 assert(tcg_enabled());
1980 /*
1981 * Initialize TCG regions--once. Now is a good time, because:
1982 * (1) TCG's init context, prologue and target globals have been set up.
1983 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1984 * -accel flag is processed, so the check doesn't work then).
1985 */
1986 if (!tcg_region_inited) {
1987 tcg_region_inited = 1;
1988 tcg_region_init();
1989 }
1990
1991 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1992 cpu->thread = g_malloc0(sizeof(QemuThread));
1993 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1994 qemu_cond_init(cpu->halt_cond);
1995
1996 if (qemu_tcg_mttcg_enabled()) {
1997 /* create a thread per vCPU with TCG (MTTCG) */
1998 parallel_cpus = true;
1999 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
2000 cpu->cpu_index);
2001
2002 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
2003 cpu, QEMU_THREAD_JOINABLE);
2004
2005 } else {
2006 /* share a single thread for all cpus with TCG */
2007 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
2008 qemu_thread_create(cpu->thread, thread_name,
2009 qemu_tcg_rr_cpu_thread_fn,
2010 cpu, QEMU_THREAD_JOINABLE);
2011
2012 single_tcg_halt_cond = cpu->halt_cond;
2013 single_tcg_cpu_thread = cpu->thread;
2014 }
2015 #ifdef _WIN32
2016 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2017 #endif
2018 } else {
2019 /* For non-MTTCG cases we share the thread */
2020 cpu->thread = single_tcg_cpu_thread;
2021 cpu->halt_cond = single_tcg_halt_cond;
2022 cpu->thread_id = first_cpu->thread_id;
2023 cpu->can_do_io = 1;
2024 cpu->created = true;
2025 }
2026 }
2027
2028 static void qemu_hax_start_vcpu(CPUState *cpu)
2029 {
2030 char thread_name[VCPU_THREAD_NAME_SIZE];
2031
2032 cpu->thread = g_malloc0(sizeof(QemuThread));
2033 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2034 qemu_cond_init(cpu->halt_cond);
2035
2036 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2037 cpu->cpu_index);
2038 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2039 cpu, QEMU_THREAD_JOINABLE);
2040 #ifdef _WIN32
2041 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2042 #endif
2043 }
2044
2045 static void qemu_kvm_start_vcpu(CPUState *cpu)
2046 {
2047 char thread_name[VCPU_THREAD_NAME_SIZE];
2048
2049 cpu->thread = g_malloc0(sizeof(QemuThread));
2050 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2051 qemu_cond_init(cpu->halt_cond);
2052 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2053 cpu->cpu_index);
2054 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2055 cpu, QEMU_THREAD_JOINABLE);
2056 }
2057
2058 static void qemu_hvf_start_vcpu(CPUState *cpu)
2059 {
2060 char thread_name[VCPU_THREAD_NAME_SIZE];
2061
2062 /* HVF currently does not support TCG, and only runs in
2063 * unrestricted-guest mode. */
2064 assert(hvf_enabled());
2065
2066 cpu->thread = g_malloc0(sizeof(QemuThread));
2067 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2068 qemu_cond_init(cpu->halt_cond);
2069
2070 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2071 cpu->cpu_index);
2072 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2073 cpu, QEMU_THREAD_JOINABLE);
2074 }
2075
2076 static void qemu_whpx_start_vcpu(CPUState *cpu)
2077 {
2078 char thread_name[VCPU_THREAD_NAME_SIZE];
2079
2080 cpu->thread = g_malloc0(sizeof(QemuThread));
2081 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2082 qemu_cond_init(cpu->halt_cond);
2083 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2084 cpu->cpu_index);
2085 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2086 cpu, QEMU_THREAD_JOINABLE);
2087 #ifdef _WIN32
2088 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2089 #endif
2090 }
2091
2092 static void qemu_dummy_start_vcpu(CPUState *cpu)
2093 {
2094 char thread_name[VCPU_THREAD_NAME_SIZE];
2095
2096 cpu->thread = g_malloc0(sizeof(QemuThread));
2097 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2098 qemu_cond_init(cpu->halt_cond);
2099 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2100 cpu->cpu_index);
2101 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2102 QEMU_THREAD_JOINABLE);
2103 }
2104
2105 void qemu_init_vcpu(CPUState *cpu)
2106 {
2107 MachineState *ms = MACHINE(qdev_get_machine());
2108
2109 cpu->nr_cores = ms->smp.cores;
2110 cpu->nr_threads = ms->smp.threads;
2111 cpu->stopped = true;
2112 cpu->random_seed = qemu_guest_random_seed_thread_part1();
2113
2114 if (!cpu->as) {
2115 /* If the target cpu hasn't set up any address spaces itself,
2116 * give it the default one.
2117 */
2118 cpu->num_ases = 1;
2119 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2120 }
2121
2122 if (kvm_enabled()) {
2123 qemu_kvm_start_vcpu(cpu);
2124 } else if (hax_enabled()) {
2125 qemu_hax_start_vcpu(cpu);
2126 } else if (hvf_enabled()) {
2127 qemu_hvf_start_vcpu(cpu);
2128 } else if (tcg_enabled()) {
2129 qemu_tcg_init_vcpu(cpu);
2130 } else if (whpx_enabled()) {
2131 qemu_whpx_start_vcpu(cpu);
2132 } else {
2133 qemu_dummy_start_vcpu(cpu);
2134 }
2135
2136 while (!cpu->created) {
2137 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2138 }
2139 }
2140
2141 void cpu_stop_current(void)
2142 {
2143 if (current_cpu) {
2144 current_cpu->stop = true;
2145 cpu_exit(current_cpu);
2146 }
2147 }
2148
2149 int vm_stop(RunState state)
2150 {
2151 if (qemu_in_vcpu_thread()) {
2152 qemu_system_vmstop_request_prepare();
2153 qemu_system_vmstop_request(state);
2154 /*
2155 * FIXME: should not return to device code in case
2156 * vm_stop() has been requested.
2157 */
2158 cpu_stop_current();
2159 return 0;
2160 }
2161
2162 return do_vm_stop(state, true);
2163 }
2164
2165 /**
2166 * Prepare for (re)starting the VM.
2167 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2168 * running or in case of an error condition), 0 otherwise.
2169 */
2170 int vm_prepare_start(void)
2171 {
2172 RunState requested;
2173
2174 qemu_vmstop_requested(&requested);
2175 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2176 return -1;
2177 }
2178
2179 /* Ensure that a STOP/RESUME pair of events is emitted if a
2180 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2181 * example, according to documentation is always followed by
2182 * the STOP event.
2183 */
2184 if (runstate_is_running()) {
2185 qapi_event_send_stop();
2186 qapi_event_send_resume();
2187 return -1;
2188 }
2189
2190 /* We are sending this now, but the CPUs will be resumed shortly later */
2191 qapi_event_send_resume();
2192
2193 cpu_enable_ticks();
2194 runstate_set(RUN_STATE_RUNNING);
2195 vm_state_notify(1, RUN_STATE_RUNNING);
2196 return 0;
2197 }
2198
2199 void vm_start(void)
2200 {
2201 if (!vm_prepare_start()) {
2202 resume_all_vcpus();
2203 }
2204 }
2205
2206 /* does a state transition even if the VM is already stopped,
2207 current state is forgotten forever */
2208 int vm_stop_force_state(RunState state)
2209 {
2210 if (runstate_is_running()) {
2211 return vm_stop(state);
2212 } else {
2213 runstate_set(state);
2214
2215 bdrv_drain_all();
2216 /* Make sure to return an error if the flush in a previous vm_stop()
2217 * failed. */
2218 return bdrv_flush_all();
2219 }
2220 }
2221
2222 void list_cpus(const char *optarg)
2223 {
2224 /* XXX: implement xxx_cpu_list for targets that still miss it */
2225 #if defined(cpu_list)
2226 cpu_list();
2227 #endif
2228 }
2229
2230 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2231 bool has_cpu, int64_t cpu_index, Error **errp)
2232 {
2233 FILE *f;
2234 uint32_t l;
2235 CPUState *cpu;
2236 uint8_t buf[1024];
2237 int64_t orig_addr = addr, orig_size = size;
2238
2239 if (!has_cpu) {
2240 cpu_index = 0;
2241 }
2242
2243 cpu = qemu_get_cpu(cpu_index);
2244 if (cpu == NULL) {
2245 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2246 "a CPU number");
2247 return;
2248 }
2249
2250 f = fopen(filename, "wb");
2251 if (!f) {
2252 error_setg_file_open(errp, errno, filename);
2253 return;
2254 }
2255
2256 while (size != 0) {
2257 l = sizeof(buf);
2258 if (l > size)
2259 l = size;
2260 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2261 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2262 " specified", orig_addr, orig_size);
2263 goto exit;
2264 }
2265 if (fwrite(buf, 1, l, f) != l) {
2266 error_setg(errp, QERR_IO_ERROR);
2267 goto exit;
2268 }
2269 addr += l;
2270 size -= l;
2271 }
2272
2273 exit:
2274 fclose(f);
2275 }
2276
2277 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2278 Error **errp)
2279 {
2280 FILE *f;
2281 uint32_t l;
2282 uint8_t buf[1024];
2283
2284 f = fopen(filename, "wb");
2285 if (!f) {
2286 error_setg_file_open(errp, errno, filename);
2287 return;
2288 }
2289
2290 while (size != 0) {
2291 l = sizeof(buf);
2292 if (l > size)
2293 l = size;
2294 cpu_physical_memory_read(addr, buf, l);
2295 if (fwrite(buf, 1, l, f) != l) {
2296 error_setg(errp, QERR_IO_ERROR);
2297 goto exit;
2298 }
2299 addr += l;
2300 size -= l;
2301 }
2302
2303 exit:
2304 fclose(f);
2305 }
2306
2307 void qmp_inject_nmi(Error **errp)
2308 {
2309 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2310 }
2311
2312 void dump_drift_info(void)
2313 {
2314 if (!use_icount) {
2315 return;
2316 }
2317
2318 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2319 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2320 if (icount_align_option) {
2321 qemu_printf("Max guest delay %"PRIi64" ms\n",
2322 -max_delay / SCALE_MS);
2323 qemu_printf("Max guest advance %"PRIi64" ms\n",
2324 max_advance / SCALE_MS);
2325 } else {
2326 qemu_printf("Max guest delay NA\n");
2327 qemu_printf("Max guest advance NA\n");
2328 }
2329 }