9pfs: move pdus to V9fsState
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/kvm.h"
37 #include "qmp-commands.h"
38 #include "exec/exec-all.h"
39
40 #include "qemu/thread.h"
41 #include "sysemu/cpus.h"
42 #include "sysemu/qtest.h"
43 #include "qemu/main-loop.h"
44 #include "qemu/bitmap.h"
45 #include "qemu/seqlock.h"
46 #include "qapi-event.h"
47 #include "hw/nmi.h"
48 #include "sysemu/replay.h"
49
50 #ifndef _WIN32
51 #include "qemu/compatfd.h"
52 #endif
53
54 #ifdef CONFIG_LINUX
55
56 #include <sys/prctl.h>
57
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
61
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
65
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
69
70 #endif /* CONFIG_LINUX */
71
72 int64_t max_delay;
73 int64_t max_advance;
74
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
78
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
82
83 bool cpu_is_stopped(CPUState *cpu)
84 {
85 return cpu->stopped || !runstate_is_running();
86 }
87
88 static bool cpu_thread_is_idle(CPUState *cpu)
89 {
90 if (cpu->stop || cpu->queued_work_first) {
91 return false;
92 }
93 if (cpu_is_stopped(cpu)) {
94 return true;
95 }
96 if (!cpu->halted || cpu_has_work(cpu) ||
97 kvm_halt_in_kernel()) {
98 return false;
99 }
100 return true;
101 }
102
103 static bool all_cpu_threads_idle(void)
104 {
105 CPUState *cpu;
106
107 CPU_FOREACH(cpu) {
108 if (!cpu_thread_is_idle(cpu)) {
109 return false;
110 }
111 }
112 return true;
113 }
114
115 /***********************************************************/
116 /* guest cycle counter */
117
118 /* Protected by TimersState seqlock */
119
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
126
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
130
131 typedef struct TimersState {
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
135
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
138 */
139 QemuSeqLock vm_clock_seqlock;
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
143
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
148 } TimersState;
149
150 static TimersState timers_state;
151
152 int64_t cpu_get_icount_raw(void)
153 {
154 int64_t icount;
155 CPUState *cpu = current_cpu;
156
157 icount = timers_state.qemu_icount;
158 if (cpu) {
159 if (!cpu->can_do_io) {
160 fprintf(stderr, "Bad icount read\n");
161 exit(1);
162 }
163 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
164 }
165 return icount;
166 }
167
168 /* Return the virtual CPU time, based on the instruction counter. */
169 static int64_t cpu_get_icount_locked(void)
170 {
171 int64_t icount = cpu_get_icount_raw();
172 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
173 }
174
175 int64_t cpu_get_icount(void)
176 {
177 int64_t icount;
178 unsigned start;
179
180 do {
181 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
182 icount = cpu_get_icount_locked();
183 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
184
185 return icount;
186 }
187
188 int64_t cpu_icount_to_ns(int64_t icount)
189 {
190 return icount << icount_time_shift;
191 }
192
193 /* return the time elapsed in VM between vm_start and vm_stop. Unless
194 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
195 * counter.
196 *
197 * Caller must hold the BQL
198 */
199 int64_t cpu_get_ticks(void)
200 {
201 int64_t ticks;
202
203 if (use_icount) {
204 return cpu_get_icount();
205 }
206
207 ticks = timers_state.cpu_ticks_offset;
208 if (timers_state.cpu_ticks_enabled) {
209 ticks += cpu_get_host_ticks();
210 }
211
212 if (timers_state.cpu_ticks_prev > ticks) {
213 /* Note: non increasing ticks may happen if the host uses
214 software suspend */
215 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
216 ticks = timers_state.cpu_ticks_prev;
217 }
218
219 timers_state.cpu_ticks_prev = ticks;
220 return ticks;
221 }
222
223 static int64_t cpu_get_clock_locked(void)
224 {
225 int64_t time;
226
227 time = timers_state.cpu_clock_offset;
228 if (timers_state.cpu_ticks_enabled) {
229 time += get_clock();
230 }
231
232 return time;
233 }
234
235 /* Return the monotonic time elapsed in VM, i.e.,
236 * the time between vm_start and vm_stop
237 */
238 int64_t cpu_get_clock(void)
239 {
240 int64_t ti;
241 unsigned start;
242
243 do {
244 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
245 ti = cpu_get_clock_locked();
246 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
247
248 return ti;
249 }
250
251 /* enable cpu_get_ticks()
252 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
253 */
254 void cpu_enable_ticks(void)
255 {
256 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
257 seqlock_write_begin(&timers_state.vm_clock_seqlock);
258 if (!timers_state.cpu_ticks_enabled) {
259 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
260 timers_state.cpu_clock_offset -= get_clock();
261 timers_state.cpu_ticks_enabled = 1;
262 }
263 seqlock_write_end(&timers_state.vm_clock_seqlock);
264 }
265
266 /* disable cpu_get_ticks() : the clock is stopped. You must not call
267 * cpu_get_ticks() after that.
268 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
269 */
270 void cpu_disable_ticks(void)
271 {
272 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
273 seqlock_write_begin(&timers_state.vm_clock_seqlock);
274 if (timers_state.cpu_ticks_enabled) {
275 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
276 timers_state.cpu_clock_offset = cpu_get_clock_locked();
277 timers_state.cpu_ticks_enabled = 0;
278 }
279 seqlock_write_end(&timers_state.vm_clock_seqlock);
280 }
281
282 /* Correlation between real and virtual time is always going to be
283 fairly approximate, so ignore small variation.
284 When the guest is idle real and virtual time will be aligned in
285 the IO wait loop. */
286 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
287
288 static void icount_adjust(void)
289 {
290 int64_t cur_time;
291 int64_t cur_icount;
292 int64_t delta;
293
294 /* Protected by TimersState mutex. */
295 static int64_t last_delta;
296
297 /* If the VM is not running, then do nothing. */
298 if (!runstate_is_running()) {
299 return;
300 }
301
302 seqlock_write_begin(&timers_state.vm_clock_seqlock);
303 cur_time = cpu_get_clock_locked();
304 cur_icount = cpu_get_icount_locked();
305
306 delta = cur_icount - cur_time;
307 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
308 if (delta > 0
309 && last_delta + ICOUNT_WOBBLE < delta * 2
310 && icount_time_shift > 0) {
311 /* The guest is getting too far ahead. Slow time down. */
312 icount_time_shift--;
313 }
314 if (delta < 0
315 && last_delta - ICOUNT_WOBBLE > delta * 2
316 && icount_time_shift < MAX_ICOUNT_SHIFT) {
317 /* The guest is getting too far behind. Speed time up. */
318 icount_time_shift++;
319 }
320 last_delta = delta;
321 timers_state.qemu_icount_bias = cur_icount
322 - (timers_state.qemu_icount << icount_time_shift);
323 seqlock_write_end(&timers_state.vm_clock_seqlock);
324 }
325
326 static void icount_adjust_rt(void *opaque)
327 {
328 timer_mod(icount_rt_timer,
329 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
330 icount_adjust();
331 }
332
333 static void icount_adjust_vm(void *opaque)
334 {
335 timer_mod(icount_vm_timer,
336 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
337 NANOSECONDS_PER_SECOND / 10);
338 icount_adjust();
339 }
340
341 static int64_t qemu_icount_round(int64_t count)
342 {
343 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
344 }
345
346 static void icount_warp_rt(void)
347 {
348 unsigned seq;
349 int64_t warp_start;
350
351 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
352 * changes from -1 to another value, so the race here is okay.
353 */
354 do {
355 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
356 warp_start = vm_clock_warp_start;
357 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
358
359 if (warp_start == -1) {
360 return;
361 }
362
363 seqlock_write_begin(&timers_state.vm_clock_seqlock);
364 if (runstate_is_running()) {
365 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
366 cpu_get_clock_locked());
367 int64_t warp_delta;
368
369 warp_delta = clock - vm_clock_warp_start;
370 if (use_icount == 2) {
371 /*
372 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
373 * far ahead of real time.
374 */
375 int64_t cur_icount = cpu_get_icount_locked();
376 int64_t delta = clock - cur_icount;
377 warp_delta = MIN(warp_delta, delta);
378 }
379 timers_state.qemu_icount_bias += warp_delta;
380 }
381 vm_clock_warp_start = -1;
382 seqlock_write_end(&timers_state.vm_clock_seqlock);
383
384 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
385 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
386 }
387 }
388
389 static void icount_timer_cb(void *opaque)
390 {
391 /* No need for a checkpoint because the timer already synchronizes
392 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
393 */
394 icount_warp_rt();
395 }
396
397 void qtest_clock_warp(int64_t dest)
398 {
399 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
400 AioContext *aio_context;
401 assert(qtest_enabled());
402 aio_context = qemu_get_aio_context();
403 while (clock < dest) {
404 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
405 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
406
407 seqlock_write_begin(&timers_state.vm_clock_seqlock);
408 timers_state.qemu_icount_bias += warp;
409 seqlock_write_end(&timers_state.vm_clock_seqlock);
410
411 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
412 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
413 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
414 }
415 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
416 }
417
418 void qemu_start_warp_timer(void)
419 {
420 int64_t clock;
421 int64_t deadline;
422
423 if (!use_icount) {
424 return;
425 }
426
427 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
428 * do not fire, so computing the deadline does not make sense.
429 */
430 if (!runstate_is_running()) {
431 return;
432 }
433
434 /* warp clock deterministically in record/replay mode */
435 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
436 return;
437 }
438
439 if (!all_cpu_threads_idle()) {
440 return;
441 }
442
443 if (qtest_enabled()) {
444 /* When testing, qtest commands advance icount. */
445 return;
446 }
447
448 /* We want to use the earliest deadline from ALL vm_clocks */
449 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
450 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
451 if (deadline < 0) {
452 static bool notified;
453 if (!icount_sleep && !notified) {
454 error_report("WARNING: icount sleep disabled and no active timers");
455 notified = true;
456 }
457 return;
458 }
459
460 if (deadline > 0) {
461 /*
462 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
463 * sleep. Otherwise, the CPU might be waiting for a future timer
464 * interrupt to wake it up, but the interrupt never comes because
465 * the vCPU isn't running any insns and thus doesn't advance the
466 * QEMU_CLOCK_VIRTUAL.
467 */
468 if (!icount_sleep) {
469 /*
470 * We never let VCPUs sleep in no sleep icount mode.
471 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
472 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
473 * It is useful when we want a deterministic execution time,
474 * isolated from host latencies.
475 */
476 seqlock_write_begin(&timers_state.vm_clock_seqlock);
477 timers_state.qemu_icount_bias += deadline;
478 seqlock_write_end(&timers_state.vm_clock_seqlock);
479 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
480 } else {
481 /*
482 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
483 * "real" time, (related to the time left until the next event) has
484 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
485 * This avoids that the warps are visible externally; for example,
486 * you will not be sending network packets continuously instead of
487 * every 100ms.
488 */
489 seqlock_write_begin(&timers_state.vm_clock_seqlock);
490 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
491 vm_clock_warp_start = clock;
492 }
493 seqlock_write_end(&timers_state.vm_clock_seqlock);
494 timer_mod_anticipate(icount_warp_timer, clock + deadline);
495 }
496 } else if (deadline == 0) {
497 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
498 }
499 }
500
501 static void qemu_account_warp_timer(void)
502 {
503 if (!use_icount || !icount_sleep) {
504 return;
505 }
506
507 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
508 * do not fire, so computing the deadline does not make sense.
509 */
510 if (!runstate_is_running()) {
511 return;
512 }
513
514 /* warp clock deterministically in record/replay mode */
515 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
516 return;
517 }
518
519 timer_del(icount_warp_timer);
520 icount_warp_rt();
521 }
522
523 static bool icount_state_needed(void *opaque)
524 {
525 return use_icount;
526 }
527
528 /*
529 * This is a subsection for icount migration.
530 */
531 static const VMStateDescription icount_vmstate_timers = {
532 .name = "timer/icount",
533 .version_id = 1,
534 .minimum_version_id = 1,
535 .needed = icount_state_needed,
536 .fields = (VMStateField[]) {
537 VMSTATE_INT64(qemu_icount_bias, TimersState),
538 VMSTATE_INT64(qemu_icount, TimersState),
539 VMSTATE_END_OF_LIST()
540 }
541 };
542
543 static const VMStateDescription vmstate_timers = {
544 .name = "timer",
545 .version_id = 2,
546 .minimum_version_id = 1,
547 .fields = (VMStateField[]) {
548 VMSTATE_INT64(cpu_ticks_offset, TimersState),
549 VMSTATE_INT64(dummy, TimersState),
550 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
551 VMSTATE_END_OF_LIST()
552 },
553 .subsections = (const VMStateDescription*[]) {
554 &icount_vmstate_timers,
555 NULL
556 }
557 };
558
559 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
560 {
561 double pct;
562 double throttle_ratio;
563 long sleeptime_ns;
564
565 if (!cpu_throttle_get_percentage()) {
566 return;
567 }
568
569 pct = (double)cpu_throttle_get_percentage()/100;
570 throttle_ratio = pct / (1 - pct);
571 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
572
573 qemu_mutex_unlock_iothread();
574 atomic_set(&cpu->throttle_thread_scheduled, 0);
575 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
576 qemu_mutex_lock_iothread();
577 }
578
579 static void cpu_throttle_timer_tick(void *opaque)
580 {
581 CPUState *cpu;
582 double pct;
583
584 /* Stop the timer if needed */
585 if (!cpu_throttle_get_percentage()) {
586 return;
587 }
588 CPU_FOREACH(cpu) {
589 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
590 async_run_on_cpu(cpu, cpu_throttle_thread,
591 RUN_ON_CPU_NULL);
592 }
593 }
594
595 pct = (double)cpu_throttle_get_percentage()/100;
596 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
597 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
598 }
599
600 void cpu_throttle_set(int new_throttle_pct)
601 {
602 /* Ensure throttle percentage is within valid range */
603 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
604 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
605
606 atomic_set(&throttle_percentage, new_throttle_pct);
607
608 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
609 CPU_THROTTLE_TIMESLICE_NS);
610 }
611
612 void cpu_throttle_stop(void)
613 {
614 atomic_set(&throttle_percentage, 0);
615 }
616
617 bool cpu_throttle_active(void)
618 {
619 return (cpu_throttle_get_percentage() != 0);
620 }
621
622 int cpu_throttle_get_percentage(void)
623 {
624 return atomic_read(&throttle_percentage);
625 }
626
627 void cpu_ticks_init(void)
628 {
629 seqlock_init(&timers_state.vm_clock_seqlock);
630 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
631 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
632 cpu_throttle_timer_tick, NULL);
633 }
634
635 void configure_icount(QemuOpts *opts, Error **errp)
636 {
637 const char *option;
638 char *rem_str = NULL;
639
640 option = qemu_opt_get(opts, "shift");
641 if (!option) {
642 if (qemu_opt_get(opts, "align") != NULL) {
643 error_setg(errp, "Please specify shift option when using align");
644 }
645 return;
646 }
647
648 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
649 if (icount_sleep) {
650 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
651 icount_timer_cb, NULL);
652 }
653
654 icount_align_option = qemu_opt_get_bool(opts, "align", false);
655
656 if (icount_align_option && !icount_sleep) {
657 error_setg(errp, "align=on and sleep=off are incompatible");
658 }
659 if (strcmp(option, "auto") != 0) {
660 errno = 0;
661 icount_time_shift = strtol(option, &rem_str, 0);
662 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
663 error_setg(errp, "icount: Invalid shift value");
664 }
665 use_icount = 1;
666 return;
667 } else if (icount_align_option) {
668 error_setg(errp, "shift=auto and align=on are incompatible");
669 } else if (!icount_sleep) {
670 error_setg(errp, "shift=auto and sleep=off are incompatible");
671 }
672
673 use_icount = 2;
674
675 /* 125MIPS seems a reasonable initial guess at the guest speed.
676 It will be corrected fairly quickly anyway. */
677 icount_time_shift = 3;
678
679 /* Have both realtime and virtual time triggers for speed adjustment.
680 The realtime trigger catches emulated time passing too slowly,
681 the virtual time trigger catches emulated time passing too fast.
682 Realtime triggers occur even when idle, so use them less frequently
683 than VM triggers. */
684 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
685 icount_adjust_rt, NULL);
686 timer_mod(icount_rt_timer,
687 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
688 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
689 icount_adjust_vm, NULL);
690 timer_mod(icount_vm_timer,
691 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
692 NANOSECONDS_PER_SECOND / 10);
693 }
694
695 /***********************************************************/
696 void hw_error(const char *fmt, ...)
697 {
698 va_list ap;
699 CPUState *cpu;
700
701 va_start(ap, fmt);
702 fprintf(stderr, "qemu: hardware error: ");
703 vfprintf(stderr, fmt, ap);
704 fprintf(stderr, "\n");
705 CPU_FOREACH(cpu) {
706 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
707 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
708 }
709 va_end(ap);
710 abort();
711 }
712
713 void cpu_synchronize_all_states(void)
714 {
715 CPUState *cpu;
716
717 CPU_FOREACH(cpu) {
718 cpu_synchronize_state(cpu);
719 }
720 }
721
722 void cpu_synchronize_all_post_reset(void)
723 {
724 CPUState *cpu;
725
726 CPU_FOREACH(cpu) {
727 cpu_synchronize_post_reset(cpu);
728 }
729 }
730
731 void cpu_synchronize_all_post_init(void)
732 {
733 CPUState *cpu;
734
735 CPU_FOREACH(cpu) {
736 cpu_synchronize_post_init(cpu);
737 }
738 }
739
740 static int do_vm_stop(RunState state)
741 {
742 int ret = 0;
743
744 if (runstate_is_running()) {
745 cpu_disable_ticks();
746 pause_all_vcpus();
747 runstate_set(state);
748 vm_state_notify(0, state);
749 qapi_event_send_stop(&error_abort);
750 }
751
752 bdrv_drain_all();
753 replay_disable_events();
754 ret = bdrv_flush_all();
755
756 return ret;
757 }
758
759 static bool cpu_can_run(CPUState *cpu)
760 {
761 if (cpu->stop) {
762 return false;
763 }
764 if (cpu_is_stopped(cpu)) {
765 return false;
766 }
767 return true;
768 }
769
770 static void cpu_handle_guest_debug(CPUState *cpu)
771 {
772 gdb_set_stop_cpu(cpu);
773 qemu_system_debug_request();
774 cpu->stopped = true;
775 }
776
777 #ifdef CONFIG_LINUX
778 static void sigbus_reraise(void)
779 {
780 sigset_t set;
781 struct sigaction action;
782
783 memset(&action, 0, sizeof(action));
784 action.sa_handler = SIG_DFL;
785 if (!sigaction(SIGBUS, &action, NULL)) {
786 raise(SIGBUS);
787 sigemptyset(&set);
788 sigaddset(&set, SIGBUS);
789 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
790 }
791 perror("Failed to re-raise SIGBUS!\n");
792 abort();
793 }
794
795 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
796 void *ctx)
797 {
798 if (kvm_on_sigbus(siginfo->ssi_code,
799 (void *)(intptr_t)siginfo->ssi_addr)) {
800 sigbus_reraise();
801 }
802 }
803
804 static void qemu_init_sigbus(void)
805 {
806 struct sigaction action;
807
808 memset(&action, 0, sizeof(action));
809 action.sa_flags = SA_SIGINFO;
810 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
811 sigaction(SIGBUS, &action, NULL);
812
813 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
814 }
815
816 static void qemu_kvm_eat_signals(CPUState *cpu)
817 {
818 struct timespec ts = { 0, 0 };
819 siginfo_t siginfo;
820 sigset_t waitset;
821 sigset_t chkset;
822 int r;
823
824 sigemptyset(&waitset);
825 sigaddset(&waitset, SIG_IPI);
826 sigaddset(&waitset, SIGBUS);
827
828 do {
829 r = sigtimedwait(&waitset, &siginfo, &ts);
830 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
831 perror("sigtimedwait");
832 exit(1);
833 }
834
835 switch (r) {
836 case SIGBUS:
837 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
838 sigbus_reraise();
839 }
840 break;
841 default:
842 break;
843 }
844
845 r = sigpending(&chkset);
846 if (r == -1) {
847 perror("sigpending");
848 exit(1);
849 }
850 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
851 }
852
853 #else /* !CONFIG_LINUX */
854
855 static void qemu_init_sigbus(void)
856 {
857 }
858
859 static void qemu_kvm_eat_signals(CPUState *cpu)
860 {
861 }
862 #endif /* !CONFIG_LINUX */
863
864 #ifndef _WIN32
865 static void dummy_signal(int sig)
866 {
867 }
868
869 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
870 {
871 int r;
872 sigset_t set;
873 struct sigaction sigact;
874
875 memset(&sigact, 0, sizeof(sigact));
876 sigact.sa_handler = dummy_signal;
877 sigaction(SIG_IPI, &sigact, NULL);
878
879 pthread_sigmask(SIG_BLOCK, NULL, &set);
880 sigdelset(&set, SIG_IPI);
881 sigdelset(&set, SIGBUS);
882 r = kvm_set_signal_mask(cpu, &set);
883 if (r) {
884 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
885 exit(1);
886 }
887 }
888
889 #else /* _WIN32 */
890 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
891 {
892 abort();
893 }
894 #endif /* _WIN32 */
895
896 static QemuMutex qemu_global_mutex;
897 static QemuCond qemu_io_proceeded_cond;
898 static unsigned iothread_requesting_mutex;
899
900 static QemuThread io_thread;
901
902 /* cpu creation */
903 static QemuCond qemu_cpu_cond;
904 /* system init */
905 static QemuCond qemu_pause_cond;
906
907 void qemu_init_cpu_loop(void)
908 {
909 qemu_init_sigbus();
910 qemu_cond_init(&qemu_cpu_cond);
911 qemu_cond_init(&qemu_pause_cond);
912 qemu_cond_init(&qemu_io_proceeded_cond);
913 qemu_mutex_init(&qemu_global_mutex);
914
915 qemu_thread_get_self(&io_thread);
916 }
917
918 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
919 {
920 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
921 }
922
923 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
924 {
925 if (kvm_destroy_vcpu(cpu) < 0) {
926 error_report("kvm_destroy_vcpu failed");
927 exit(EXIT_FAILURE);
928 }
929 }
930
931 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
932 {
933 }
934
935 static void qemu_wait_io_event_common(CPUState *cpu)
936 {
937 if (cpu->stop) {
938 cpu->stop = false;
939 cpu->stopped = true;
940 qemu_cond_broadcast(&qemu_pause_cond);
941 }
942 process_queued_cpu_work(cpu);
943 cpu->thread_kicked = false;
944 }
945
946 static void qemu_tcg_wait_io_event(CPUState *cpu)
947 {
948 while (all_cpu_threads_idle()) {
949 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
950 }
951
952 while (iothread_requesting_mutex) {
953 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
954 }
955
956 CPU_FOREACH(cpu) {
957 qemu_wait_io_event_common(cpu);
958 }
959 }
960
961 static void qemu_kvm_wait_io_event(CPUState *cpu)
962 {
963 while (cpu_thread_is_idle(cpu)) {
964 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
965 }
966
967 qemu_kvm_eat_signals(cpu);
968 qemu_wait_io_event_common(cpu);
969 }
970
971 static void *qemu_kvm_cpu_thread_fn(void *arg)
972 {
973 CPUState *cpu = arg;
974 int r;
975
976 rcu_register_thread();
977
978 qemu_mutex_lock_iothread();
979 qemu_thread_get_self(cpu->thread);
980 cpu->thread_id = qemu_get_thread_id();
981 cpu->can_do_io = 1;
982 current_cpu = cpu;
983
984 r = kvm_init_vcpu(cpu);
985 if (r < 0) {
986 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
987 exit(1);
988 }
989
990 qemu_kvm_init_cpu_signals(cpu);
991
992 /* signal CPU creation */
993 cpu->created = true;
994 qemu_cond_signal(&qemu_cpu_cond);
995
996 do {
997 if (cpu_can_run(cpu)) {
998 r = kvm_cpu_exec(cpu);
999 if (r == EXCP_DEBUG) {
1000 cpu_handle_guest_debug(cpu);
1001 }
1002 }
1003 qemu_kvm_wait_io_event(cpu);
1004 } while (!cpu->unplug || cpu_can_run(cpu));
1005
1006 qemu_kvm_destroy_vcpu(cpu);
1007 cpu->created = false;
1008 qemu_cond_signal(&qemu_cpu_cond);
1009 qemu_mutex_unlock_iothread();
1010 return NULL;
1011 }
1012
1013 static void *qemu_dummy_cpu_thread_fn(void *arg)
1014 {
1015 #ifdef _WIN32
1016 fprintf(stderr, "qtest is not supported under Windows\n");
1017 exit(1);
1018 #else
1019 CPUState *cpu = arg;
1020 sigset_t waitset;
1021 int r;
1022
1023 rcu_register_thread();
1024
1025 qemu_mutex_lock_iothread();
1026 qemu_thread_get_self(cpu->thread);
1027 cpu->thread_id = qemu_get_thread_id();
1028 cpu->can_do_io = 1;
1029
1030 sigemptyset(&waitset);
1031 sigaddset(&waitset, SIG_IPI);
1032
1033 /* signal CPU creation */
1034 cpu->created = true;
1035 qemu_cond_signal(&qemu_cpu_cond);
1036
1037 current_cpu = cpu;
1038 while (1) {
1039 current_cpu = NULL;
1040 qemu_mutex_unlock_iothread();
1041 do {
1042 int sig;
1043 r = sigwait(&waitset, &sig);
1044 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1045 if (r == -1) {
1046 perror("sigwait");
1047 exit(1);
1048 }
1049 qemu_mutex_lock_iothread();
1050 current_cpu = cpu;
1051 qemu_wait_io_event_common(cpu);
1052 }
1053
1054 return NULL;
1055 #endif
1056 }
1057
1058 static int64_t tcg_get_icount_limit(void)
1059 {
1060 int64_t deadline;
1061
1062 if (replay_mode != REPLAY_MODE_PLAY) {
1063 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1064
1065 /* Maintain prior (possibly buggy) behaviour where if no deadline
1066 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1067 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1068 * nanoseconds.
1069 */
1070 if ((deadline < 0) || (deadline > INT32_MAX)) {
1071 deadline = INT32_MAX;
1072 }
1073
1074 return qemu_icount_round(deadline);
1075 } else {
1076 return replay_get_instructions();
1077 }
1078 }
1079
1080 static void handle_icount_deadline(void)
1081 {
1082 if (use_icount) {
1083 int64_t deadline =
1084 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1085
1086 if (deadline == 0) {
1087 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1088 }
1089 }
1090 }
1091
1092 static int tcg_cpu_exec(CPUState *cpu)
1093 {
1094 int ret;
1095 #ifdef CONFIG_PROFILER
1096 int64_t ti;
1097 #endif
1098
1099 #ifdef CONFIG_PROFILER
1100 ti = profile_getclock();
1101 #endif
1102 if (use_icount) {
1103 int64_t count;
1104 int decr;
1105 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1106 + cpu->icount_extra);
1107 cpu->icount_decr.u16.low = 0;
1108 cpu->icount_extra = 0;
1109 count = tcg_get_icount_limit();
1110 timers_state.qemu_icount += count;
1111 decr = (count > 0xffff) ? 0xffff : count;
1112 count -= decr;
1113 cpu->icount_decr.u16.low = decr;
1114 cpu->icount_extra = count;
1115 }
1116 cpu_exec_start(cpu);
1117 ret = cpu_exec(cpu);
1118 cpu_exec_end(cpu);
1119 #ifdef CONFIG_PROFILER
1120 tcg_time += profile_getclock() - ti;
1121 #endif
1122 if (use_icount) {
1123 /* Fold pending instructions back into the
1124 instruction counter, and clear the interrupt flag. */
1125 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1126 + cpu->icount_extra);
1127 cpu->icount_decr.u32 = 0;
1128 cpu->icount_extra = 0;
1129 replay_account_executed_instructions();
1130 }
1131 return ret;
1132 }
1133
1134 /* Destroy any remaining vCPUs which have been unplugged and have
1135 * finished running
1136 */
1137 static void deal_with_unplugged_cpus(void)
1138 {
1139 CPUState *cpu;
1140
1141 CPU_FOREACH(cpu) {
1142 if (cpu->unplug && !cpu_can_run(cpu)) {
1143 qemu_tcg_destroy_vcpu(cpu);
1144 cpu->created = false;
1145 qemu_cond_signal(&qemu_cpu_cond);
1146 break;
1147 }
1148 }
1149 }
1150
1151 static void *qemu_tcg_cpu_thread_fn(void *arg)
1152 {
1153 CPUState *cpu = arg;
1154
1155 rcu_register_thread();
1156
1157 qemu_mutex_lock_iothread();
1158 qemu_thread_get_self(cpu->thread);
1159
1160 CPU_FOREACH(cpu) {
1161 cpu->thread_id = qemu_get_thread_id();
1162 cpu->created = true;
1163 cpu->can_do_io = 1;
1164 }
1165 qemu_cond_signal(&qemu_cpu_cond);
1166
1167 /* wait for initial kick-off after machine start */
1168 while (first_cpu->stopped) {
1169 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1170
1171 /* process any pending work */
1172 CPU_FOREACH(cpu) {
1173 qemu_wait_io_event_common(cpu);
1174 }
1175 }
1176
1177 /* process any pending work */
1178 atomic_mb_set(&exit_request, 1);
1179
1180 cpu = first_cpu;
1181
1182 while (1) {
1183 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1184 qemu_account_warp_timer();
1185
1186 if (!cpu) {
1187 cpu = first_cpu;
1188 }
1189
1190 for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1191
1192 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1193 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1194
1195 if (cpu_can_run(cpu)) {
1196 int r;
1197 r = tcg_cpu_exec(cpu);
1198 if (r == EXCP_DEBUG) {
1199 cpu_handle_guest_debug(cpu);
1200 break;
1201 }
1202 } else if (cpu->stop || cpu->stopped) {
1203 if (cpu->unplug) {
1204 cpu = CPU_NEXT(cpu);
1205 }
1206 break;
1207 }
1208
1209 } /* for cpu.. */
1210
1211 /* Pairs with smp_wmb in qemu_cpu_kick. */
1212 atomic_mb_set(&exit_request, 0);
1213
1214 handle_icount_deadline();
1215
1216 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1217 deal_with_unplugged_cpus();
1218 }
1219
1220 return NULL;
1221 }
1222
1223 static void qemu_cpu_kick_thread(CPUState *cpu)
1224 {
1225 #ifndef _WIN32
1226 int err;
1227
1228 if (cpu->thread_kicked) {
1229 return;
1230 }
1231 cpu->thread_kicked = true;
1232 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1233 if (err) {
1234 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1235 exit(1);
1236 }
1237 #else /* _WIN32 */
1238 abort();
1239 #endif
1240 }
1241
1242 static void qemu_cpu_kick_no_halt(void)
1243 {
1244 CPUState *cpu;
1245 /* Ensure whatever caused the exit has reached the CPU threads before
1246 * writing exit_request.
1247 */
1248 atomic_mb_set(&exit_request, 1);
1249 cpu = atomic_mb_read(&tcg_current_cpu);
1250 if (cpu) {
1251 cpu_exit(cpu);
1252 }
1253 }
1254
1255 void qemu_cpu_kick(CPUState *cpu)
1256 {
1257 qemu_cond_broadcast(cpu->halt_cond);
1258 if (tcg_enabled()) {
1259 qemu_cpu_kick_no_halt();
1260 } else {
1261 qemu_cpu_kick_thread(cpu);
1262 }
1263 }
1264
1265 void qemu_cpu_kick_self(void)
1266 {
1267 assert(current_cpu);
1268 qemu_cpu_kick_thread(current_cpu);
1269 }
1270
1271 bool qemu_cpu_is_self(CPUState *cpu)
1272 {
1273 return qemu_thread_is_self(cpu->thread);
1274 }
1275
1276 bool qemu_in_vcpu_thread(void)
1277 {
1278 return current_cpu && qemu_cpu_is_self(current_cpu);
1279 }
1280
1281 static __thread bool iothread_locked = false;
1282
1283 bool qemu_mutex_iothread_locked(void)
1284 {
1285 return iothread_locked;
1286 }
1287
1288 void qemu_mutex_lock_iothread(void)
1289 {
1290 atomic_inc(&iothread_requesting_mutex);
1291 /* In the simple case there is no need to bump the VCPU thread out of
1292 * TCG code execution.
1293 */
1294 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1295 !first_cpu || !first_cpu->created) {
1296 qemu_mutex_lock(&qemu_global_mutex);
1297 atomic_dec(&iothread_requesting_mutex);
1298 } else {
1299 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1300 qemu_cpu_kick_no_halt();
1301 qemu_mutex_lock(&qemu_global_mutex);
1302 }
1303 atomic_dec(&iothread_requesting_mutex);
1304 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1305 }
1306 iothread_locked = true;
1307 }
1308
1309 void qemu_mutex_unlock_iothread(void)
1310 {
1311 iothread_locked = false;
1312 qemu_mutex_unlock(&qemu_global_mutex);
1313 }
1314
1315 static bool all_vcpus_paused(void)
1316 {
1317 CPUState *cpu;
1318
1319 CPU_FOREACH(cpu) {
1320 if (!cpu->stopped) {
1321 return false;
1322 }
1323 }
1324
1325 return true;
1326 }
1327
1328 void pause_all_vcpus(void)
1329 {
1330 CPUState *cpu;
1331
1332 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1333 CPU_FOREACH(cpu) {
1334 cpu->stop = true;
1335 qemu_cpu_kick(cpu);
1336 }
1337
1338 if (qemu_in_vcpu_thread()) {
1339 cpu_stop_current();
1340 if (!kvm_enabled()) {
1341 CPU_FOREACH(cpu) {
1342 cpu->stop = false;
1343 cpu->stopped = true;
1344 }
1345 return;
1346 }
1347 }
1348
1349 while (!all_vcpus_paused()) {
1350 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1351 CPU_FOREACH(cpu) {
1352 qemu_cpu_kick(cpu);
1353 }
1354 }
1355 }
1356
1357 void cpu_resume(CPUState *cpu)
1358 {
1359 cpu->stop = false;
1360 cpu->stopped = false;
1361 qemu_cpu_kick(cpu);
1362 }
1363
1364 void resume_all_vcpus(void)
1365 {
1366 CPUState *cpu;
1367
1368 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1369 CPU_FOREACH(cpu) {
1370 cpu_resume(cpu);
1371 }
1372 }
1373
1374 void cpu_remove(CPUState *cpu)
1375 {
1376 cpu->stop = true;
1377 cpu->unplug = true;
1378 qemu_cpu_kick(cpu);
1379 }
1380
1381 void cpu_remove_sync(CPUState *cpu)
1382 {
1383 cpu_remove(cpu);
1384 while (cpu->created) {
1385 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1386 }
1387 }
1388
1389 /* For temporary buffers for forming a name */
1390 #define VCPU_THREAD_NAME_SIZE 16
1391
1392 static void qemu_tcg_init_vcpu(CPUState *cpu)
1393 {
1394 char thread_name[VCPU_THREAD_NAME_SIZE];
1395 static QemuCond *tcg_halt_cond;
1396 static QemuThread *tcg_cpu_thread;
1397
1398 /* share a single thread for all cpus with TCG */
1399 if (!tcg_cpu_thread) {
1400 cpu->thread = g_malloc0(sizeof(QemuThread));
1401 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1402 qemu_cond_init(cpu->halt_cond);
1403 tcg_halt_cond = cpu->halt_cond;
1404 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1405 cpu->cpu_index);
1406 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1407 cpu, QEMU_THREAD_JOINABLE);
1408 #ifdef _WIN32
1409 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1410 #endif
1411 while (!cpu->created) {
1412 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1413 }
1414 tcg_cpu_thread = cpu->thread;
1415 } else {
1416 cpu->thread = tcg_cpu_thread;
1417 cpu->halt_cond = tcg_halt_cond;
1418 }
1419 }
1420
1421 static void qemu_kvm_start_vcpu(CPUState *cpu)
1422 {
1423 char thread_name[VCPU_THREAD_NAME_SIZE];
1424
1425 cpu->thread = g_malloc0(sizeof(QemuThread));
1426 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1427 qemu_cond_init(cpu->halt_cond);
1428 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1429 cpu->cpu_index);
1430 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1431 cpu, QEMU_THREAD_JOINABLE);
1432 while (!cpu->created) {
1433 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1434 }
1435 }
1436
1437 static void qemu_dummy_start_vcpu(CPUState *cpu)
1438 {
1439 char thread_name[VCPU_THREAD_NAME_SIZE];
1440
1441 cpu->thread = g_malloc0(sizeof(QemuThread));
1442 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1443 qemu_cond_init(cpu->halt_cond);
1444 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1445 cpu->cpu_index);
1446 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1447 QEMU_THREAD_JOINABLE);
1448 while (!cpu->created) {
1449 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1450 }
1451 }
1452
1453 void qemu_init_vcpu(CPUState *cpu)
1454 {
1455 cpu->nr_cores = smp_cores;
1456 cpu->nr_threads = smp_threads;
1457 cpu->stopped = true;
1458
1459 if (!cpu->as) {
1460 /* If the target cpu hasn't set up any address spaces itself,
1461 * give it the default one.
1462 */
1463 AddressSpace *as = address_space_init_shareable(cpu->memory,
1464 "cpu-memory");
1465 cpu->num_ases = 1;
1466 cpu_address_space_init(cpu, as, 0);
1467 }
1468
1469 if (kvm_enabled()) {
1470 qemu_kvm_start_vcpu(cpu);
1471 } else if (tcg_enabled()) {
1472 qemu_tcg_init_vcpu(cpu);
1473 } else {
1474 qemu_dummy_start_vcpu(cpu);
1475 }
1476 }
1477
1478 void cpu_stop_current(void)
1479 {
1480 if (current_cpu) {
1481 current_cpu->stop = false;
1482 current_cpu->stopped = true;
1483 cpu_exit(current_cpu);
1484 qemu_cond_broadcast(&qemu_pause_cond);
1485 }
1486 }
1487
1488 int vm_stop(RunState state)
1489 {
1490 if (qemu_in_vcpu_thread()) {
1491 qemu_system_vmstop_request_prepare();
1492 qemu_system_vmstop_request(state);
1493 /*
1494 * FIXME: should not return to device code in case
1495 * vm_stop() has been requested.
1496 */
1497 cpu_stop_current();
1498 return 0;
1499 }
1500
1501 return do_vm_stop(state);
1502 }
1503
1504 /* does a state transition even if the VM is already stopped,
1505 current state is forgotten forever */
1506 int vm_stop_force_state(RunState state)
1507 {
1508 if (runstate_is_running()) {
1509 return vm_stop(state);
1510 } else {
1511 runstate_set(state);
1512
1513 bdrv_drain_all();
1514 /* Make sure to return an error if the flush in a previous vm_stop()
1515 * failed. */
1516 return bdrv_flush_all();
1517 }
1518 }
1519
1520 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1521 {
1522 /* XXX: implement xxx_cpu_list for targets that still miss it */
1523 #if defined(cpu_list)
1524 cpu_list(f, cpu_fprintf);
1525 #endif
1526 }
1527
1528 CpuInfoList *qmp_query_cpus(Error **errp)
1529 {
1530 CpuInfoList *head = NULL, *cur_item = NULL;
1531 CPUState *cpu;
1532
1533 CPU_FOREACH(cpu) {
1534 CpuInfoList *info;
1535 #if defined(TARGET_I386)
1536 X86CPU *x86_cpu = X86_CPU(cpu);
1537 CPUX86State *env = &x86_cpu->env;
1538 #elif defined(TARGET_PPC)
1539 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1540 CPUPPCState *env = &ppc_cpu->env;
1541 #elif defined(TARGET_SPARC)
1542 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1543 CPUSPARCState *env = &sparc_cpu->env;
1544 #elif defined(TARGET_MIPS)
1545 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1546 CPUMIPSState *env = &mips_cpu->env;
1547 #elif defined(TARGET_TRICORE)
1548 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1549 CPUTriCoreState *env = &tricore_cpu->env;
1550 #endif
1551
1552 cpu_synchronize_state(cpu);
1553
1554 info = g_malloc0(sizeof(*info));
1555 info->value = g_malloc0(sizeof(*info->value));
1556 info->value->CPU = cpu->cpu_index;
1557 info->value->current = (cpu == first_cpu);
1558 info->value->halted = cpu->halted;
1559 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1560 info->value->thread_id = cpu->thread_id;
1561 #if defined(TARGET_I386)
1562 info->value->arch = CPU_INFO_ARCH_X86;
1563 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1564 #elif defined(TARGET_PPC)
1565 info->value->arch = CPU_INFO_ARCH_PPC;
1566 info->value->u.ppc.nip = env->nip;
1567 #elif defined(TARGET_SPARC)
1568 info->value->arch = CPU_INFO_ARCH_SPARC;
1569 info->value->u.q_sparc.pc = env->pc;
1570 info->value->u.q_sparc.npc = env->npc;
1571 #elif defined(TARGET_MIPS)
1572 info->value->arch = CPU_INFO_ARCH_MIPS;
1573 info->value->u.q_mips.PC = env->active_tc.PC;
1574 #elif defined(TARGET_TRICORE)
1575 info->value->arch = CPU_INFO_ARCH_TRICORE;
1576 info->value->u.tricore.PC = env->PC;
1577 #else
1578 info->value->arch = CPU_INFO_ARCH_OTHER;
1579 #endif
1580
1581 /* XXX: waiting for the qapi to support GSList */
1582 if (!cur_item) {
1583 head = cur_item = info;
1584 } else {
1585 cur_item->next = info;
1586 cur_item = info;
1587 }
1588 }
1589
1590 return head;
1591 }
1592
1593 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1594 bool has_cpu, int64_t cpu_index, Error **errp)
1595 {
1596 FILE *f;
1597 uint32_t l;
1598 CPUState *cpu;
1599 uint8_t buf[1024];
1600 int64_t orig_addr = addr, orig_size = size;
1601
1602 if (!has_cpu) {
1603 cpu_index = 0;
1604 }
1605
1606 cpu = qemu_get_cpu(cpu_index);
1607 if (cpu == NULL) {
1608 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1609 "a CPU number");
1610 return;
1611 }
1612
1613 f = fopen(filename, "wb");
1614 if (!f) {
1615 error_setg_file_open(errp, errno, filename);
1616 return;
1617 }
1618
1619 while (size != 0) {
1620 l = sizeof(buf);
1621 if (l > size)
1622 l = size;
1623 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1624 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1625 " specified", orig_addr, orig_size);
1626 goto exit;
1627 }
1628 if (fwrite(buf, 1, l, f) != l) {
1629 error_setg(errp, QERR_IO_ERROR);
1630 goto exit;
1631 }
1632 addr += l;
1633 size -= l;
1634 }
1635
1636 exit:
1637 fclose(f);
1638 }
1639
1640 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1641 Error **errp)
1642 {
1643 FILE *f;
1644 uint32_t l;
1645 uint8_t buf[1024];
1646
1647 f = fopen(filename, "wb");
1648 if (!f) {
1649 error_setg_file_open(errp, errno, filename);
1650 return;
1651 }
1652
1653 while (size != 0) {
1654 l = sizeof(buf);
1655 if (l > size)
1656 l = size;
1657 cpu_physical_memory_read(addr, buf, l);
1658 if (fwrite(buf, 1, l, f) != l) {
1659 error_setg(errp, QERR_IO_ERROR);
1660 goto exit;
1661 }
1662 addr += l;
1663 size -= l;
1664 }
1665
1666 exit:
1667 fclose(f);
1668 }
1669
1670 void qmp_inject_nmi(Error **errp)
1671 {
1672 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1673 }
1674
1675 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1676 {
1677 if (!use_icount) {
1678 return;
1679 }
1680
1681 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1682 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1683 if (icount_align_option) {
1684 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1685 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1686 } else {
1687 cpu_fprintf(f, "Max guest delay NA\n");
1688 cpu_fprintf(f, "Max guest advance NA\n");
1689 }
1690 }