block: User BdrvChild callback for device name
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "sysemu/block-backend.h"
33 #include "exec/gdbstub.h"
34 #include "sysemu/dma.h"
35 #include "sysemu/kvm.h"
36 #include "qmp-commands.h"
37
38 #include "qemu/thread.h"
39 #include "sysemu/cpus.h"
40 #include "sysemu/qtest.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/bitmap.h"
43 #include "qemu/seqlock.h"
44 #include "qapi-event.h"
45 #include "hw/nmi.h"
46 #include "sysemu/replay.h"
47
48 #ifndef _WIN32
49 #include "qemu/compatfd.h"
50 #endif
51
52 #ifdef CONFIG_LINUX
53
54 #include <sys/prctl.h>
55
56 #ifndef PR_MCE_KILL
57 #define PR_MCE_KILL 33
58 #endif
59
60 #ifndef PR_MCE_KILL_SET
61 #define PR_MCE_KILL_SET 1
62 #endif
63
64 #ifndef PR_MCE_KILL_EARLY
65 #define PR_MCE_KILL_EARLY 1
66 #endif
67
68 #endif /* CONFIG_LINUX */
69
70 static CPUState *next_cpu;
71 int64_t max_delay;
72 int64_t max_advance;
73
74 /* vcpu throttling controls */
75 static QEMUTimer *throttle_timer;
76 static unsigned int throttle_percentage;
77
78 #define CPU_THROTTLE_PCT_MIN 1
79 #define CPU_THROTTLE_PCT_MAX 99
80 #define CPU_THROTTLE_TIMESLICE_NS 10000000
81
82 bool cpu_is_stopped(CPUState *cpu)
83 {
84 return cpu->stopped || !runstate_is_running();
85 }
86
87 static bool cpu_thread_is_idle(CPUState *cpu)
88 {
89 if (cpu->stop || cpu->queued_work_first) {
90 return false;
91 }
92 if (cpu_is_stopped(cpu)) {
93 return true;
94 }
95 if (!cpu->halted || cpu_has_work(cpu) ||
96 kvm_halt_in_kernel()) {
97 return false;
98 }
99 return true;
100 }
101
102 static bool all_cpu_threads_idle(void)
103 {
104 CPUState *cpu;
105
106 CPU_FOREACH(cpu) {
107 if (!cpu_thread_is_idle(cpu)) {
108 return false;
109 }
110 }
111 return true;
112 }
113
114 /***********************************************************/
115 /* guest cycle counter */
116
117 /* Protected by TimersState seqlock */
118
119 static bool icount_sleep = true;
120 static int64_t vm_clock_warp_start = -1;
121 /* Conversion factor from emulated instructions to virtual clock ticks. */
122 static int icount_time_shift;
123 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
124 #define MAX_ICOUNT_SHIFT 10
125
126 static QEMUTimer *icount_rt_timer;
127 static QEMUTimer *icount_vm_timer;
128 static QEMUTimer *icount_warp_timer;
129
130 typedef struct TimersState {
131 /* Protected by BQL. */
132 int64_t cpu_ticks_prev;
133 int64_t cpu_ticks_offset;
134
135 /* cpu_clock_offset can be read out of BQL, so protect it with
136 * this lock.
137 */
138 QemuSeqLock vm_clock_seqlock;
139 int64_t cpu_clock_offset;
140 int32_t cpu_ticks_enabled;
141 int64_t dummy;
142
143 /* Compensate for varying guest execution speed. */
144 int64_t qemu_icount_bias;
145 /* Only written by TCG thread */
146 int64_t qemu_icount;
147 } TimersState;
148
149 static TimersState timers_state;
150
151 int64_t cpu_get_icount_raw(void)
152 {
153 int64_t icount;
154 CPUState *cpu = current_cpu;
155
156 icount = timers_state.qemu_icount;
157 if (cpu) {
158 if (!cpu->can_do_io) {
159 fprintf(stderr, "Bad icount read\n");
160 exit(1);
161 }
162 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
163 }
164 return icount;
165 }
166
167 /* Return the virtual CPU time, based on the instruction counter. */
168 static int64_t cpu_get_icount_locked(void)
169 {
170 int64_t icount = cpu_get_icount_raw();
171 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
172 }
173
174 int64_t cpu_get_icount(void)
175 {
176 int64_t icount;
177 unsigned start;
178
179 do {
180 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
181 icount = cpu_get_icount_locked();
182 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
183
184 return icount;
185 }
186
187 int64_t cpu_icount_to_ns(int64_t icount)
188 {
189 return icount << icount_time_shift;
190 }
191
192 /* return the host CPU cycle counter and handle stop/restart */
193 /* Caller must hold the BQL */
194 int64_t cpu_get_ticks(void)
195 {
196 int64_t ticks;
197
198 if (use_icount) {
199 return cpu_get_icount();
200 }
201
202 ticks = timers_state.cpu_ticks_offset;
203 if (timers_state.cpu_ticks_enabled) {
204 ticks += cpu_get_host_ticks();
205 }
206
207 if (timers_state.cpu_ticks_prev > ticks) {
208 /* Note: non increasing ticks may happen if the host uses
209 software suspend */
210 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
211 ticks = timers_state.cpu_ticks_prev;
212 }
213
214 timers_state.cpu_ticks_prev = ticks;
215 return ticks;
216 }
217
218 static int64_t cpu_get_clock_locked(void)
219 {
220 int64_t ticks;
221
222 ticks = timers_state.cpu_clock_offset;
223 if (timers_state.cpu_ticks_enabled) {
224 ticks += get_clock();
225 }
226
227 return ticks;
228 }
229
230 /* return the host CPU monotonic timer and handle stop/restart */
231 int64_t cpu_get_clock(void)
232 {
233 int64_t ti;
234 unsigned start;
235
236 do {
237 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
238 ti = cpu_get_clock_locked();
239 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
240
241 return ti;
242 }
243
244 /* enable cpu_get_ticks()
245 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
246 */
247 void cpu_enable_ticks(void)
248 {
249 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
250 seqlock_write_lock(&timers_state.vm_clock_seqlock);
251 if (!timers_state.cpu_ticks_enabled) {
252 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
253 timers_state.cpu_clock_offset -= get_clock();
254 timers_state.cpu_ticks_enabled = 1;
255 }
256 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
257 }
258
259 /* disable cpu_get_ticks() : the clock is stopped. You must not call
260 * cpu_get_ticks() after that.
261 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
262 */
263 void cpu_disable_ticks(void)
264 {
265 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
266 seqlock_write_lock(&timers_state.vm_clock_seqlock);
267 if (timers_state.cpu_ticks_enabled) {
268 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
269 timers_state.cpu_clock_offset = cpu_get_clock_locked();
270 timers_state.cpu_ticks_enabled = 0;
271 }
272 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
273 }
274
275 /* Correlation between real and virtual time is always going to be
276 fairly approximate, so ignore small variation.
277 When the guest is idle real and virtual time will be aligned in
278 the IO wait loop. */
279 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
280
281 static void icount_adjust(void)
282 {
283 int64_t cur_time;
284 int64_t cur_icount;
285 int64_t delta;
286
287 /* Protected by TimersState mutex. */
288 static int64_t last_delta;
289
290 /* If the VM is not running, then do nothing. */
291 if (!runstate_is_running()) {
292 return;
293 }
294
295 seqlock_write_lock(&timers_state.vm_clock_seqlock);
296 cur_time = cpu_get_clock_locked();
297 cur_icount = cpu_get_icount_locked();
298
299 delta = cur_icount - cur_time;
300 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
301 if (delta > 0
302 && last_delta + ICOUNT_WOBBLE < delta * 2
303 && icount_time_shift > 0) {
304 /* The guest is getting too far ahead. Slow time down. */
305 icount_time_shift--;
306 }
307 if (delta < 0
308 && last_delta - ICOUNT_WOBBLE > delta * 2
309 && icount_time_shift < MAX_ICOUNT_SHIFT) {
310 /* The guest is getting too far behind. Speed time up. */
311 icount_time_shift++;
312 }
313 last_delta = delta;
314 timers_state.qemu_icount_bias = cur_icount
315 - (timers_state.qemu_icount << icount_time_shift);
316 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
317 }
318
319 static void icount_adjust_rt(void *opaque)
320 {
321 timer_mod(icount_rt_timer,
322 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
323 icount_adjust();
324 }
325
326 static void icount_adjust_vm(void *opaque)
327 {
328 timer_mod(icount_vm_timer,
329 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
330 NANOSECONDS_PER_SECOND / 10);
331 icount_adjust();
332 }
333
334 static int64_t qemu_icount_round(int64_t count)
335 {
336 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
337 }
338
339 static void icount_warp_rt(void)
340 {
341 unsigned seq;
342 int64_t warp_start;
343
344 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
345 * changes from -1 to another value, so the race here is okay.
346 */
347 do {
348 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
349 warp_start = vm_clock_warp_start;
350 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
351
352 if (warp_start == -1) {
353 return;
354 }
355
356 seqlock_write_lock(&timers_state.vm_clock_seqlock);
357 if (runstate_is_running()) {
358 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
359 cpu_get_clock_locked());
360 int64_t warp_delta;
361
362 warp_delta = clock - vm_clock_warp_start;
363 if (use_icount == 2) {
364 /*
365 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
366 * far ahead of real time.
367 */
368 int64_t cur_icount = cpu_get_icount_locked();
369 int64_t delta = clock - cur_icount;
370 warp_delta = MIN(warp_delta, delta);
371 }
372 timers_state.qemu_icount_bias += warp_delta;
373 }
374 vm_clock_warp_start = -1;
375 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
376
377 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
378 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
379 }
380 }
381
382 static void icount_timer_cb(void *opaque)
383 {
384 /* No need for a checkpoint because the timer already synchronizes
385 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
386 */
387 icount_warp_rt();
388 }
389
390 void qtest_clock_warp(int64_t dest)
391 {
392 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
393 AioContext *aio_context;
394 assert(qtest_enabled());
395 aio_context = qemu_get_aio_context();
396 while (clock < dest) {
397 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
398 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
399
400 seqlock_write_lock(&timers_state.vm_clock_seqlock);
401 timers_state.qemu_icount_bias += warp;
402 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
403
404 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
405 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
406 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
407 }
408 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
409 }
410
411 void qemu_start_warp_timer(void)
412 {
413 int64_t clock;
414 int64_t deadline;
415
416 if (!use_icount) {
417 return;
418 }
419
420 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
421 * do not fire, so computing the deadline does not make sense.
422 */
423 if (!runstate_is_running()) {
424 return;
425 }
426
427 /* warp clock deterministically in record/replay mode */
428 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
429 return;
430 }
431
432 if (!all_cpu_threads_idle()) {
433 return;
434 }
435
436 if (qtest_enabled()) {
437 /* When testing, qtest commands advance icount. */
438 return;
439 }
440
441 /* We want to use the earliest deadline from ALL vm_clocks */
442 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
443 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
444 if (deadline < 0) {
445 static bool notified;
446 if (!icount_sleep && !notified) {
447 error_report("WARNING: icount sleep disabled and no active timers");
448 notified = true;
449 }
450 return;
451 }
452
453 if (deadline > 0) {
454 /*
455 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
456 * sleep. Otherwise, the CPU might be waiting for a future timer
457 * interrupt to wake it up, but the interrupt never comes because
458 * the vCPU isn't running any insns and thus doesn't advance the
459 * QEMU_CLOCK_VIRTUAL.
460 */
461 if (!icount_sleep) {
462 /*
463 * We never let VCPUs sleep in no sleep icount mode.
464 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
465 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
466 * It is useful when we want a deterministic execution time,
467 * isolated from host latencies.
468 */
469 seqlock_write_lock(&timers_state.vm_clock_seqlock);
470 timers_state.qemu_icount_bias += deadline;
471 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
472 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
473 } else {
474 /*
475 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
476 * "real" time, (related to the time left until the next event) has
477 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
478 * This avoids that the warps are visible externally; for example,
479 * you will not be sending network packets continuously instead of
480 * every 100ms.
481 */
482 seqlock_write_lock(&timers_state.vm_clock_seqlock);
483 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
484 vm_clock_warp_start = clock;
485 }
486 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
487 timer_mod_anticipate(icount_warp_timer, clock + deadline);
488 }
489 } else if (deadline == 0) {
490 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
491 }
492 }
493
494 static void qemu_account_warp_timer(void)
495 {
496 if (!use_icount || !icount_sleep) {
497 return;
498 }
499
500 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
501 * do not fire, so computing the deadline does not make sense.
502 */
503 if (!runstate_is_running()) {
504 return;
505 }
506
507 /* warp clock deterministically in record/replay mode */
508 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
509 return;
510 }
511
512 timer_del(icount_warp_timer);
513 icount_warp_rt();
514 }
515
516 static bool icount_state_needed(void *opaque)
517 {
518 return use_icount;
519 }
520
521 /*
522 * This is a subsection for icount migration.
523 */
524 static const VMStateDescription icount_vmstate_timers = {
525 .name = "timer/icount",
526 .version_id = 1,
527 .minimum_version_id = 1,
528 .needed = icount_state_needed,
529 .fields = (VMStateField[]) {
530 VMSTATE_INT64(qemu_icount_bias, TimersState),
531 VMSTATE_INT64(qemu_icount, TimersState),
532 VMSTATE_END_OF_LIST()
533 }
534 };
535
536 static const VMStateDescription vmstate_timers = {
537 .name = "timer",
538 .version_id = 2,
539 .minimum_version_id = 1,
540 .fields = (VMStateField[]) {
541 VMSTATE_INT64(cpu_ticks_offset, TimersState),
542 VMSTATE_INT64(dummy, TimersState),
543 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
544 VMSTATE_END_OF_LIST()
545 },
546 .subsections = (const VMStateDescription*[]) {
547 &icount_vmstate_timers,
548 NULL
549 }
550 };
551
552 static void cpu_throttle_thread(void *opaque)
553 {
554 CPUState *cpu = opaque;
555 double pct;
556 double throttle_ratio;
557 long sleeptime_ns;
558
559 if (!cpu_throttle_get_percentage()) {
560 return;
561 }
562
563 pct = (double)cpu_throttle_get_percentage()/100;
564 throttle_ratio = pct / (1 - pct);
565 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
566
567 qemu_mutex_unlock_iothread();
568 atomic_set(&cpu->throttle_thread_scheduled, 0);
569 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
570 qemu_mutex_lock_iothread();
571 }
572
573 static void cpu_throttle_timer_tick(void *opaque)
574 {
575 CPUState *cpu;
576 double pct;
577
578 /* Stop the timer if needed */
579 if (!cpu_throttle_get_percentage()) {
580 return;
581 }
582 CPU_FOREACH(cpu) {
583 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
584 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
585 }
586 }
587
588 pct = (double)cpu_throttle_get_percentage()/100;
589 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
590 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
591 }
592
593 void cpu_throttle_set(int new_throttle_pct)
594 {
595 /* Ensure throttle percentage is within valid range */
596 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
597 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
598
599 atomic_set(&throttle_percentage, new_throttle_pct);
600
601 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
602 CPU_THROTTLE_TIMESLICE_NS);
603 }
604
605 void cpu_throttle_stop(void)
606 {
607 atomic_set(&throttle_percentage, 0);
608 }
609
610 bool cpu_throttle_active(void)
611 {
612 return (cpu_throttle_get_percentage() != 0);
613 }
614
615 int cpu_throttle_get_percentage(void)
616 {
617 return atomic_read(&throttle_percentage);
618 }
619
620 void cpu_ticks_init(void)
621 {
622 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
623 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
624 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
625 cpu_throttle_timer_tick, NULL);
626 }
627
628 void configure_icount(QemuOpts *opts, Error **errp)
629 {
630 const char *option;
631 char *rem_str = NULL;
632
633 option = qemu_opt_get(opts, "shift");
634 if (!option) {
635 if (qemu_opt_get(opts, "align") != NULL) {
636 error_setg(errp, "Please specify shift option when using align");
637 }
638 return;
639 }
640
641 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
642 if (icount_sleep) {
643 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
644 icount_timer_cb, NULL);
645 }
646
647 icount_align_option = qemu_opt_get_bool(opts, "align", false);
648
649 if (icount_align_option && !icount_sleep) {
650 error_setg(errp, "align=on and sleep=off are incompatible");
651 }
652 if (strcmp(option, "auto") != 0) {
653 errno = 0;
654 icount_time_shift = strtol(option, &rem_str, 0);
655 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
656 error_setg(errp, "icount: Invalid shift value");
657 }
658 use_icount = 1;
659 return;
660 } else if (icount_align_option) {
661 error_setg(errp, "shift=auto and align=on are incompatible");
662 } else if (!icount_sleep) {
663 error_setg(errp, "shift=auto and sleep=off are incompatible");
664 }
665
666 use_icount = 2;
667
668 /* 125MIPS seems a reasonable initial guess at the guest speed.
669 It will be corrected fairly quickly anyway. */
670 icount_time_shift = 3;
671
672 /* Have both realtime and virtual time triggers for speed adjustment.
673 The realtime trigger catches emulated time passing too slowly,
674 the virtual time trigger catches emulated time passing too fast.
675 Realtime triggers occur even when idle, so use them less frequently
676 than VM triggers. */
677 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
678 icount_adjust_rt, NULL);
679 timer_mod(icount_rt_timer,
680 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
681 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
682 icount_adjust_vm, NULL);
683 timer_mod(icount_vm_timer,
684 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
685 NANOSECONDS_PER_SECOND / 10);
686 }
687
688 /***********************************************************/
689 void hw_error(const char *fmt, ...)
690 {
691 va_list ap;
692 CPUState *cpu;
693
694 va_start(ap, fmt);
695 fprintf(stderr, "qemu: hardware error: ");
696 vfprintf(stderr, fmt, ap);
697 fprintf(stderr, "\n");
698 CPU_FOREACH(cpu) {
699 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
700 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
701 }
702 va_end(ap);
703 abort();
704 }
705
706 void cpu_synchronize_all_states(void)
707 {
708 CPUState *cpu;
709
710 CPU_FOREACH(cpu) {
711 cpu_synchronize_state(cpu);
712 }
713 }
714
715 void cpu_synchronize_all_post_reset(void)
716 {
717 CPUState *cpu;
718
719 CPU_FOREACH(cpu) {
720 cpu_synchronize_post_reset(cpu);
721 }
722 }
723
724 void cpu_synchronize_all_post_init(void)
725 {
726 CPUState *cpu;
727
728 CPU_FOREACH(cpu) {
729 cpu_synchronize_post_init(cpu);
730 }
731 }
732
733 static int do_vm_stop(RunState state)
734 {
735 int ret = 0;
736
737 if (runstate_is_running()) {
738 cpu_disable_ticks();
739 pause_all_vcpus();
740 runstate_set(state);
741 vm_state_notify(0, state);
742 qapi_event_send_stop(&error_abort);
743 }
744
745 bdrv_drain_all();
746 ret = blk_flush_all();
747
748 return ret;
749 }
750
751 static bool cpu_can_run(CPUState *cpu)
752 {
753 if (cpu->stop) {
754 return false;
755 }
756 if (cpu_is_stopped(cpu)) {
757 return false;
758 }
759 return true;
760 }
761
762 static void cpu_handle_guest_debug(CPUState *cpu)
763 {
764 gdb_set_stop_cpu(cpu);
765 qemu_system_debug_request();
766 cpu->stopped = true;
767 }
768
769 #ifdef CONFIG_LINUX
770 static void sigbus_reraise(void)
771 {
772 sigset_t set;
773 struct sigaction action;
774
775 memset(&action, 0, sizeof(action));
776 action.sa_handler = SIG_DFL;
777 if (!sigaction(SIGBUS, &action, NULL)) {
778 raise(SIGBUS);
779 sigemptyset(&set);
780 sigaddset(&set, SIGBUS);
781 sigprocmask(SIG_UNBLOCK, &set, NULL);
782 }
783 perror("Failed to re-raise SIGBUS!\n");
784 abort();
785 }
786
787 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
788 void *ctx)
789 {
790 if (kvm_on_sigbus(siginfo->ssi_code,
791 (void *)(intptr_t)siginfo->ssi_addr)) {
792 sigbus_reraise();
793 }
794 }
795
796 static void qemu_init_sigbus(void)
797 {
798 struct sigaction action;
799
800 memset(&action, 0, sizeof(action));
801 action.sa_flags = SA_SIGINFO;
802 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
803 sigaction(SIGBUS, &action, NULL);
804
805 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
806 }
807
808 static void qemu_kvm_eat_signals(CPUState *cpu)
809 {
810 struct timespec ts = { 0, 0 };
811 siginfo_t siginfo;
812 sigset_t waitset;
813 sigset_t chkset;
814 int r;
815
816 sigemptyset(&waitset);
817 sigaddset(&waitset, SIG_IPI);
818 sigaddset(&waitset, SIGBUS);
819
820 do {
821 r = sigtimedwait(&waitset, &siginfo, &ts);
822 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
823 perror("sigtimedwait");
824 exit(1);
825 }
826
827 switch (r) {
828 case SIGBUS:
829 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
830 sigbus_reraise();
831 }
832 break;
833 default:
834 break;
835 }
836
837 r = sigpending(&chkset);
838 if (r == -1) {
839 perror("sigpending");
840 exit(1);
841 }
842 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
843 }
844
845 #else /* !CONFIG_LINUX */
846
847 static void qemu_init_sigbus(void)
848 {
849 }
850
851 static void qemu_kvm_eat_signals(CPUState *cpu)
852 {
853 }
854 #endif /* !CONFIG_LINUX */
855
856 #ifndef _WIN32
857 static void dummy_signal(int sig)
858 {
859 }
860
861 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
862 {
863 int r;
864 sigset_t set;
865 struct sigaction sigact;
866
867 memset(&sigact, 0, sizeof(sigact));
868 sigact.sa_handler = dummy_signal;
869 sigaction(SIG_IPI, &sigact, NULL);
870
871 pthread_sigmask(SIG_BLOCK, NULL, &set);
872 sigdelset(&set, SIG_IPI);
873 sigdelset(&set, SIGBUS);
874 r = kvm_set_signal_mask(cpu, &set);
875 if (r) {
876 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
877 exit(1);
878 }
879 }
880
881 #else /* _WIN32 */
882 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
883 {
884 abort();
885 }
886 #endif /* _WIN32 */
887
888 static QemuMutex qemu_global_mutex;
889 static QemuCond qemu_io_proceeded_cond;
890 static unsigned iothread_requesting_mutex;
891
892 static QemuThread io_thread;
893
894 /* cpu creation */
895 static QemuCond qemu_cpu_cond;
896 /* system init */
897 static QemuCond qemu_pause_cond;
898 static QemuCond qemu_work_cond;
899
900 void qemu_init_cpu_loop(void)
901 {
902 qemu_init_sigbus();
903 qemu_cond_init(&qemu_cpu_cond);
904 qemu_cond_init(&qemu_pause_cond);
905 qemu_cond_init(&qemu_work_cond);
906 qemu_cond_init(&qemu_io_proceeded_cond);
907 qemu_mutex_init(&qemu_global_mutex);
908
909 qemu_thread_get_self(&io_thread);
910 }
911
912 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
913 {
914 struct qemu_work_item wi;
915
916 if (qemu_cpu_is_self(cpu)) {
917 func(data);
918 return;
919 }
920
921 wi.func = func;
922 wi.data = data;
923 wi.free = false;
924
925 qemu_mutex_lock(&cpu->work_mutex);
926 if (cpu->queued_work_first == NULL) {
927 cpu->queued_work_first = &wi;
928 } else {
929 cpu->queued_work_last->next = &wi;
930 }
931 cpu->queued_work_last = &wi;
932 wi.next = NULL;
933 wi.done = false;
934 qemu_mutex_unlock(&cpu->work_mutex);
935
936 qemu_cpu_kick(cpu);
937 while (!atomic_mb_read(&wi.done)) {
938 CPUState *self_cpu = current_cpu;
939
940 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
941 current_cpu = self_cpu;
942 }
943 }
944
945 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
946 {
947 struct qemu_work_item *wi;
948
949 if (qemu_cpu_is_self(cpu)) {
950 func(data);
951 return;
952 }
953
954 wi = g_malloc0(sizeof(struct qemu_work_item));
955 wi->func = func;
956 wi->data = data;
957 wi->free = true;
958
959 qemu_mutex_lock(&cpu->work_mutex);
960 if (cpu->queued_work_first == NULL) {
961 cpu->queued_work_first = wi;
962 } else {
963 cpu->queued_work_last->next = wi;
964 }
965 cpu->queued_work_last = wi;
966 wi->next = NULL;
967 wi->done = false;
968 qemu_mutex_unlock(&cpu->work_mutex);
969
970 qemu_cpu_kick(cpu);
971 }
972
973 static void flush_queued_work(CPUState *cpu)
974 {
975 struct qemu_work_item *wi;
976
977 if (cpu->queued_work_first == NULL) {
978 return;
979 }
980
981 qemu_mutex_lock(&cpu->work_mutex);
982 while (cpu->queued_work_first != NULL) {
983 wi = cpu->queued_work_first;
984 cpu->queued_work_first = wi->next;
985 if (!cpu->queued_work_first) {
986 cpu->queued_work_last = NULL;
987 }
988 qemu_mutex_unlock(&cpu->work_mutex);
989 wi->func(wi->data);
990 qemu_mutex_lock(&cpu->work_mutex);
991 if (wi->free) {
992 g_free(wi);
993 } else {
994 atomic_mb_set(&wi->done, true);
995 }
996 }
997 qemu_mutex_unlock(&cpu->work_mutex);
998 qemu_cond_broadcast(&qemu_work_cond);
999 }
1000
1001 static void qemu_wait_io_event_common(CPUState *cpu)
1002 {
1003 if (cpu->stop) {
1004 cpu->stop = false;
1005 cpu->stopped = true;
1006 qemu_cond_broadcast(&qemu_pause_cond);
1007 }
1008 flush_queued_work(cpu);
1009 cpu->thread_kicked = false;
1010 }
1011
1012 static void qemu_tcg_wait_io_event(CPUState *cpu)
1013 {
1014 while (all_cpu_threads_idle()) {
1015 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1016 }
1017
1018 while (iothread_requesting_mutex) {
1019 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1020 }
1021
1022 CPU_FOREACH(cpu) {
1023 qemu_wait_io_event_common(cpu);
1024 }
1025 }
1026
1027 static void qemu_kvm_wait_io_event(CPUState *cpu)
1028 {
1029 while (cpu_thread_is_idle(cpu)) {
1030 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1031 }
1032
1033 qemu_kvm_eat_signals(cpu);
1034 qemu_wait_io_event_common(cpu);
1035 }
1036
1037 static void *qemu_kvm_cpu_thread_fn(void *arg)
1038 {
1039 CPUState *cpu = arg;
1040 int r;
1041
1042 rcu_register_thread();
1043
1044 qemu_mutex_lock_iothread();
1045 qemu_thread_get_self(cpu->thread);
1046 cpu->thread_id = qemu_get_thread_id();
1047 cpu->can_do_io = 1;
1048 current_cpu = cpu;
1049
1050 r = kvm_init_vcpu(cpu);
1051 if (r < 0) {
1052 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1053 exit(1);
1054 }
1055
1056 qemu_kvm_init_cpu_signals(cpu);
1057
1058 /* signal CPU creation */
1059 cpu->created = true;
1060 qemu_cond_signal(&qemu_cpu_cond);
1061
1062 while (1) {
1063 if (cpu_can_run(cpu)) {
1064 r = kvm_cpu_exec(cpu);
1065 if (r == EXCP_DEBUG) {
1066 cpu_handle_guest_debug(cpu);
1067 }
1068 }
1069 qemu_kvm_wait_io_event(cpu);
1070 }
1071
1072 return NULL;
1073 }
1074
1075 static void *qemu_dummy_cpu_thread_fn(void *arg)
1076 {
1077 #ifdef _WIN32
1078 fprintf(stderr, "qtest is not supported under Windows\n");
1079 exit(1);
1080 #else
1081 CPUState *cpu = arg;
1082 sigset_t waitset;
1083 int r;
1084
1085 rcu_register_thread();
1086
1087 qemu_mutex_lock_iothread();
1088 qemu_thread_get_self(cpu->thread);
1089 cpu->thread_id = qemu_get_thread_id();
1090 cpu->can_do_io = 1;
1091
1092 sigemptyset(&waitset);
1093 sigaddset(&waitset, SIG_IPI);
1094
1095 /* signal CPU creation */
1096 cpu->created = true;
1097 qemu_cond_signal(&qemu_cpu_cond);
1098
1099 current_cpu = cpu;
1100 while (1) {
1101 current_cpu = NULL;
1102 qemu_mutex_unlock_iothread();
1103 do {
1104 int sig;
1105 r = sigwait(&waitset, &sig);
1106 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1107 if (r == -1) {
1108 perror("sigwait");
1109 exit(1);
1110 }
1111 qemu_mutex_lock_iothread();
1112 current_cpu = cpu;
1113 qemu_wait_io_event_common(cpu);
1114 }
1115
1116 return NULL;
1117 #endif
1118 }
1119
1120 static void tcg_exec_all(void);
1121
1122 static void *qemu_tcg_cpu_thread_fn(void *arg)
1123 {
1124 CPUState *cpu = arg;
1125
1126 rcu_register_thread();
1127
1128 qemu_mutex_lock_iothread();
1129 qemu_thread_get_self(cpu->thread);
1130
1131 CPU_FOREACH(cpu) {
1132 cpu->thread_id = qemu_get_thread_id();
1133 cpu->created = true;
1134 cpu->can_do_io = 1;
1135 }
1136 qemu_cond_signal(&qemu_cpu_cond);
1137
1138 /* wait for initial kick-off after machine start */
1139 while (first_cpu->stopped) {
1140 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1141
1142 /* process any pending work */
1143 CPU_FOREACH(cpu) {
1144 qemu_wait_io_event_common(cpu);
1145 }
1146 }
1147
1148 /* process any pending work */
1149 atomic_mb_set(&exit_request, 1);
1150
1151 while (1) {
1152 tcg_exec_all();
1153
1154 if (use_icount) {
1155 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1156
1157 if (deadline == 0) {
1158 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1159 }
1160 }
1161 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1162 }
1163
1164 return NULL;
1165 }
1166
1167 static void qemu_cpu_kick_thread(CPUState *cpu)
1168 {
1169 #ifndef _WIN32
1170 int err;
1171
1172 if (cpu->thread_kicked) {
1173 return;
1174 }
1175 cpu->thread_kicked = true;
1176 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1177 if (err) {
1178 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1179 exit(1);
1180 }
1181 #else /* _WIN32 */
1182 abort();
1183 #endif
1184 }
1185
1186 static void qemu_cpu_kick_no_halt(void)
1187 {
1188 CPUState *cpu;
1189 /* Ensure whatever caused the exit has reached the CPU threads before
1190 * writing exit_request.
1191 */
1192 atomic_mb_set(&exit_request, 1);
1193 cpu = atomic_mb_read(&tcg_current_cpu);
1194 if (cpu) {
1195 cpu_exit(cpu);
1196 }
1197 }
1198
1199 void qemu_cpu_kick(CPUState *cpu)
1200 {
1201 qemu_cond_broadcast(cpu->halt_cond);
1202 if (tcg_enabled()) {
1203 qemu_cpu_kick_no_halt();
1204 } else {
1205 qemu_cpu_kick_thread(cpu);
1206 }
1207 }
1208
1209 void qemu_cpu_kick_self(void)
1210 {
1211 assert(current_cpu);
1212 qemu_cpu_kick_thread(current_cpu);
1213 }
1214
1215 bool qemu_cpu_is_self(CPUState *cpu)
1216 {
1217 return qemu_thread_is_self(cpu->thread);
1218 }
1219
1220 bool qemu_in_vcpu_thread(void)
1221 {
1222 return current_cpu && qemu_cpu_is_self(current_cpu);
1223 }
1224
1225 static __thread bool iothread_locked = false;
1226
1227 bool qemu_mutex_iothread_locked(void)
1228 {
1229 return iothread_locked;
1230 }
1231
1232 void qemu_mutex_lock_iothread(void)
1233 {
1234 atomic_inc(&iothread_requesting_mutex);
1235 /* In the simple case there is no need to bump the VCPU thread out of
1236 * TCG code execution.
1237 */
1238 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1239 !first_cpu || !first_cpu->created) {
1240 qemu_mutex_lock(&qemu_global_mutex);
1241 atomic_dec(&iothread_requesting_mutex);
1242 } else {
1243 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1244 qemu_cpu_kick_no_halt();
1245 qemu_mutex_lock(&qemu_global_mutex);
1246 }
1247 atomic_dec(&iothread_requesting_mutex);
1248 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1249 }
1250 iothread_locked = true;
1251 }
1252
1253 void qemu_mutex_unlock_iothread(void)
1254 {
1255 iothread_locked = false;
1256 qemu_mutex_unlock(&qemu_global_mutex);
1257 }
1258
1259 static int all_vcpus_paused(void)
1260 {
1261 CPUState *cpu;
1262
1263 CPU_FOREACH(cpu) {
1264 if (!cpu->stopped) {
1265 return 0;
1266 }
1267 }
1268
1269 return 1;
1270 }
1271
1272 void pause_all_vcpus(void)
1273 {
1274 CPUState *cpu;
1275
1276 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1277 CPU_FOREACH(cpu) {
1278 cpu->stop = true;
1279 qemu_cpu_kick(cpu);
1280 }
1281
1282 if (qemu_in_vcpu_thread()) {
1283 cpu_stop_current();
1284 if (!kvm_enabled()) {
1285 CPU_FOREACH(cpu) {
1286 cpu->stop = false;
1287 cpu->stopped = true;
1288 }
1289 return;
1290 }
1291 }
1292
1293 while (!all_vcpus_paused()) {
1294 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1295 CPU_FOREACH(cpu) {
1296 qemu_cpu_kick(cpu);
1297 }
1298 }
1299 }
1300
1301 void cpu_resume(CPUState *cpu)
1302 {
1303 cpu->stop = false;
1304 cpu->stopped = false;
1305 qemu_cpu_kick(cpu);
1306 }
1307
1308 void resume_all_vcpus(void)
1309 {
1310 CPUState *cpu;
1311
1312 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1313 CPU_FOREACH(cpu) {
1314 cpu_resume(cpu);
1315 }
1316 }
1317
1318 /* For temporary buffers for forming a name */
1319 #define VCPU_THREAD_NAME_SIZE 16
1320
1321 static void qemu_tcg_init_vcpu(CPUState *cpu)
1322 {
1323 char thread_name[VCPU_THREAD_NAME_SIZE];
1324 static QemuCond *tcg_halt_cond;
1325 static QemuThread *tcg_cpu_thread;
1326
1327 /* share a single thread for all cpus with TCG */
1328 if (!tcg_cpu_thread) {
1329 cpu->thread = g_malloc0(sizeof(QemuThread));
1330 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1331 qemu_cond_init(cpu->halt_cond);
1332 tcg_halt_cond = cpu->halt_cond;
1333 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1334 cpu->cpu_index);
1335 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1336 cpu, QEMU_THREAD_JOINABLE);
1337 #ifdef _WIN32
1338 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1339 #endif
1340 while (!cpu->created) {
1341 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1342 }
1343 tcg_cpu_thread = cpu->thread;
1344 } else {
1345 cpu->thread = tcg_cpu_thread;
1346 cpu->halt_cond = tcg_halt_cond;
1347 }
1348 }
1349
1350 static void qemu_kvm_start_vcpu(CPUState *cpu)
1351 {
1352 char thread_name[VCPU_THREAD_NAME_SIZE];
1353
1354 cpu->thread = g_malloc0(sizeof(QemuThread));
1355 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1356 qemu_cond_init(cpu->halt_cond);
1357 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1358 cpu->cpu_index);
1359 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1360 cpu, QEMU_THREAD_JOINABLE);
1361 while (!cpu->created) {
1362 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1363 }
1364 }
1365
1366 static void qemu_dummy_start_vcpu(CPUState *cpu)
1367 {
1368 char thread_name[VCPU_THREAD_NAME_SIZE];
1369
1370 cpu->thread = g_malloc0(sizeof(QemuThread));
1371 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1372 qemu_cond_init(cpu->halt_cond);
1373 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1374 cpu->cpu_index);
1375 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1376 QEMU_THREAD_JOINABLE);
1377 while (!cpu->created) {
1378 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1379 }
1380 }
1381
1382 void qemu_init_vcpu(CPUState *cpu)
1383 {
1384 cpu->nr_cores = smp_cores;
1385 cpu->nr_threads = smp_threads;
1386 cpu->stopped = true;
1387
1388 if (!cpu->as) {
1389 /* If the target cpu hasn't set up any address spaces itself,
1390 * give it the default one.
1391 */
1392 AddressSpace *as = address_space_init_shareable(cpu->memory,
1393 "cpu-memory");
1394 cpu->num_ases = 1;
1395 cpu_address_space_init(cpu, as, 0);
1396 }
1397
1398 if (kvm_enabled()) {
1399 qemu_kvm_start_vcpu(cpu);
1400 } else if (tcg_enabled()) {
1401 qemu_tcg_init_vcpu(cpu);
1402 } else {
1403 qemu_dummy_start_vcpu(cpu);
1404 }
1405 }
1406
1407 void cpu_stop_current(void)
1408 {
1409 if (current_cpu) {
1410 current_cpu->stop = false;
1411 current_cpu->stopped = true;
1412 cpu_exit(current_cpu);
1413 qemu_cond_broadcast(&qemu_pause_cond);
1414 }
1415 }
1416
1417 int vm_stop(RunState state)
1418 {
1419 if (qemu_in_vcpu_thread()) {
1420 qemu_system_vmstop_request_prepare();
1421 qemu_system_vmstop_request(state);
1422 /*
1423 * FIXME: should not return to device code in case
1424 * vm_stop() has been requested.
1425 */
1426 cpu_stop_current();
1427 return 0;
1428 }
1429
1430 return do_vm_stop(state);
1431 }
1432
1433 /* does a state transition even if the VM is already stopped,
1434 current state is forgotten forever */
1435 int vm_stop_force_state(RunState state)
1436 {
1437 if (runstate_is_running()) {
1438 return vm_stop(state);
1439 } else {
1440 runstate_set(state);
1441
1442 bdrv_drain_all();
1443 /* Make sure to return an error if the flush in a previous vm_stop()
1444 * failed. */
1445 return blk_flush_all();
1446 }
1447 }
1448
1449 static int64_t tcg_get_icount_limit(void)
1450 {
1451 int64_t deadline;
1452
1453 if (replay_mode != REPLAY_MODE_PLAY) {
1454 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1455
1456 /* Maintain prior (possibly buggy) behaviour where if no deadline
1457 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1458 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1459 * nanoseconds.
1460 */
1461 if ((deadline < 0) || (deadline > INT32_MAX)) {
1462 deadline = INT32_MAX;
1463 }
1464
1465 return qemu_icount_round(deadline);
1466 } else {
1467 return replay_get_instructions();
1468 }
1469 }
1470
1471 static int tcg_cpu_exec(CPUState *cpu)
1472 {
1473 int ret;
1474 #ifdef CONFIG_PROFILER
1475 int64_t ti;
1476 #endif
1477
1478 #ifdef CONFIG_PROFILER
1479 ti = profile_getclock();
1480 #endif
1481 if (use_icount) {
1482 int64_t count;
1483 int decr;
1484 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1485 + cpu->icount_extra);
1486 cpu->icount_decr.u16.low = 0;
1487 cpu->icount_extra = 0;
1488 count = tcg_get_icount_limit();
1489 timers_state.qemu_icount += count;
1490 decr = (count > 0xffff) ? 0xffff : count;
1491 count -= decr;
1492 cpu->icount_decr.u16.low = decr;
1493 cpu->icount_extra = count;
1494 }
1495 ret = cpu_exec(cpu);
1496 #ifdef CONFIG_PROFILER
1497 tcg_time += profile_getclock() - ti;
1498 #endif
1499 if (use_icount) {
1500 /* Fold pending instructions back into the
1501 instruction counter, and clear the interrupt flag. */
1502 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1503 + cpu->icount_extra);
1504 cpu->icount_decr.u32 = 0;
1505 cpu->icount_extra = 0;
1506 replay_account_executed_instructions();
1507 }
1508 return ret;
1509 }
1510
1511 static void tcg_exec_all(void)
1512 {
1513 int r;
1514
1515 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1516 qemu_account_warp_timer();
1517
1518 if (next_cpu == NULL) {
1519 next_cpu = first_cpu;
1520 }
1521 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1522 CPUState *cpu = next_cpu;
1523
1524 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1525 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1526
1527 if (cpu_can_run(cpu)) {
1528 r = tcg_cpu_exec(cpu);
1529 if (r == EXCP_DEBUG) {
1530 cpu_handle_guest_debug(cpu);
1531 break;
1532 }
1533 } else if (cpu->stop || cpu->stopped) {
1534 break;
1535 }
1536 }
1537
1538 /* Pairs with smp_wmb in qemu_cpu_kick. */
1539 atomic_mb_set(&exit_request, 0);
1540 }
1541
1542 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1543 {
1544 /* XXX: implement xxx_cpu_list for targets that still miss it */
1545 #if defined(cpu_list)
1546 cpu_list(f, cpu_fprintf);
1547 #endif
1548 }
1549
1550 CpuInfoList *qmp_query_cpus(Error **errp)
1551 {
1552 CpuInfoList *head = NULL, *cur_item = NULL;
1553 CPUState *cpu;
1554
1555 CPU_FOREACH(cpu) {
1556 CpuInfoList *info;
1557 #if defined(TARGET_I386)
1558 X86CPU *x86_cpu = X86_CPU(cpu);
1559 CPUX86State *env = &x86_cpu->env;
1560 #elif defined(TARGET_PPC)
1561 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1562 CPUPPCState *env = &ppc_cpu->env;
1563 #elif defined(TARGET_SPARC)
1564 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1565 CPUSPARCState *env = &sparc_cpu->env;
1566 #elif defined(TARGET_MIPS)
1567 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1568 CPUMIPSState *env = &mips_cpu->env;
1569 #elif defined(TARGET_TRICORE)
1570 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1571 CPUTriCoreState *env = &tricore_cpu->env;
1572 #endif
1573
1574 cpu_synchronize_state(cpu);
1575
1576 info = g_malloc0(sizeof(*info));
1577 info->value = g_malloc0(sizeof(*info->value));
1578 info->value->CPU = cpu->cpu_index;
1579 info->value->current = (cpu == first_cpu);
1580 info->value->halted = cpu->halted;
1581 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1582 info->value->thread_id = cpu->thread_id;
1583 #if defined(TARGET_I386)
1584 info->value->arch = CPU_INFO_ARCH_X86;
1585 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1586 #elif defined(TARGET_PPC)
1587 info->value->arch = CPU_INFO_ARCH_PPC;
1588 info->value->u.ppc.nip = env->nip;
1589 #elif defined(TARGET_SPARC)
1590 info->value->arch = CPU_INFO_ARCH_SPARC;
1591 info->value->u.q_sparc.pc = env->pc;
1592 info->value->u.q_sparc.npc = env->npc;
1593 #elif defined(TARGET_MIPS)
1594 info->value->arch = CPU_INFO_ARCH_MIPS;
1595 info->value->u.q_mips.PC = env->active_tc.PC;
1596 #elif defined(TARGET_TRICORE)
1597 info->value->arch = CPU_INFO_ARCH_TRICORE;
1598 info->value->u.tricore.PC = env->PC;
1599 #else
1600 info->value->arch = CPU_INFO_ARCH_OTHER;
1601 #endif
1602
1603 /* XXX: waiting for the qapi to support GSList */
1604 if (!cur_item) {
1605 head = cur_item = info;
1606 } else {
1607 cur_item->next = info;
1608 cur_item = info;
1609 }
1610 }
1611
1612 return head;
1613 }
1614
1615 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1616 bool has_cpu, int64_t cpu_index, Error **errp)
1617 {
1618 FILE *f;
1619 uint32_t l;
1620 CPUState *cpu;
1621 uint8_t buf[1024];
1622 int64_t orig_addr = addr, orig_size = size;
1623
1624 if (!has_cpu) {
1625 cpu_index = 0;
1626 }
1627
1628 cpu = qemu_get_cpu(cpu_index);
1629 if (cpu == NULL) {
1630 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1631 "a CPU number");
1632 return;
1633 }
1634
1635 f = fopen(filename, "wb");
1636 if (!f) {
1637 error_setg_file_open(errp, errno, filename);
1638 return;
1639 }
1640
1641 while (size != 0) {
1642 l = sizeof(buf);
1643 if (l > size)
1644 l = size;
1645 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1646 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1647 " specified", orig_addr, orig_size);
1648 goto exit;
1649 }
1650 if (fwrite(buf, 1, l, f) != l) {
1651 error_setg(errp, QERR_IO_ERROR);
1652 goto exit;
1653 }
1654 addr += l;
1655 size -= l;
1656 }
1657
1658 exit:
1659 fclose(f);
1660 }
1661
1662 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1663 Error **errp)
1664 {
1665 FILE *f;
1666 uint32_t l;
1667 uint8_t buf[1024];
1668
1669 f = fopen(filename, "wb");
1670 if (!f) {
1671 error_setg_file_open(errp, errno, filename);
1672 return;
1673 }
1674
1675 while (size != 0) {
1676 l = sizeof(buf);
1677 if (l > size)
1678 l = size;
1679 cpu_physical_memory_read(addr, buf, l);
1680 if (fwrite(buf, 1, l, f) != l) {
1681 error_setg(errp, QERR_IO_ERROR);
1682 goto exit;
1683 }
1684 addr += l;
1685 size -= l;
1686 }
1687
1688 exit:
1689 fclose(f);
1690 }
1691
1692 void qmp_inject_nmi(Error **errp)
1693 {
1694 #if defined(TARGET_I386)
1695 CPUState *cs;
1696
1697 CPU_FOREACH(cs) {
1698 X86CPU *cpu = X86_CPU(cs);
1699
1700 if (!cpu->apic_state) {
1701 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1702 } else {
1703 apic_deliver_nmi(cpu->apic_state);
1704 }
1705 }
1706 #else
1707 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1708 #endif
1709 }
1710
1711 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1712 {
1713 if (!use_icount) {
1714 return;
1715 }
1716
1717 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1718 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1719 if (icount_align_option) {
1720 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1721 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1722 } else {
1723 cpu_fprintf(f, "Max guest delay NA\n");
1724 cpu_fprintf(f, "Max guest advance NA\n");
1725 }
1726 }