trap signals for "-serial mon:stdio"
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
34
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64 static int cap_one_reg;
65 static int cap_epr;
66 static int cap_ppc_watchdog;
67 static int cap_papr;
68
69 /* XXX We have a race condition where we actually have a level triggered
70 * interrupt, but the infrastructure can't expose that yet, so the guest
71 * takes but ignores it, goes to sleep and never gets notified that there's
72 * still an interrupt pending.
73 *
74 * As a quick workaround, let's just wake up again 20 ms after we injected
75 * an interrupt. That way we can assure that we're always reinjecting
76 * interrupts in case the guest swallowed them.
77 */
78 static QEMUTimer *idle_timer;
79
80 static void kvm_kick_cpu(void *opaque)
81 {
82 PowerPCCPU *cpu = opaque;
83
84 qemu_cpu_kick(CPU(cpu));
85 }
86
87 static int kvm_ppc_register_host_cpu_type(void);
88
89 int kvm_arch_init(KVMState *s)
90 {
91 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
92 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
93 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
94 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
95 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
96 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
97 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
98 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
99 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
100 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
101 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
102 /* Note: we don't set cap_papr here, because this capability is
103 * only activated after this by kvmppc_set_papr() */
104
105 if (!cap_interrupt_level) {
106 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
107 "VM to stall at times!\n");
108 }
109
110 kvm_ppc_register_host_cpu_type();
111
112 return 0;
113 }
114
115 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
116 {
117 CPUPPCState *cenv = &cpu->env;
118 CPUState *cs = CPU(cpu);
119 struct kvm_sregs sregs;
120 int ret;
121
122 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
123 /* What we're really trying to say is "if we're on BookE, we use
124 the native PVR for now". This is the only sane way to check
125 it though, so we potentially confuse users that they can run
126 BookE guests on BookS. Let's hope nobody dares enough :) */
127 return 0;
128 } else {
129 if (!cap_segstate) {
130 fprintf(stderr, "kvm error: missing PVR setting capability\n");
131 return -ENOSYS;
132 }
133 }
134
135 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
136 if (ret) {
137 return ret;
138 }
139
140 sregs.pvr = cenv->spr[SPR_PVR];
141 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
142 }
143
144 /* Set up a shared TLB array with KVM */
145 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
146 {
147 CPUPPCState *env = &cpu->env;
148 CPUState *cs = CPU(cpu);
149 struct kvm_book3e_206_tlb_params params = {};
150 struct kvm_config_tlb cfg = {};
151 struct kvm_enable_cap encap = {};
152 unsigned int entries = 0;
153 int ret, i;
154
155 if (!kvm_enabled() ||
156 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
157 return 0;
158 }
159
160 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
161
162 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
163 params.tlb_sizes[i] = booke206_tlb_size(env, i);
164 params.tlb_ways[i] = booke206_tlb_ways(env, i);
165 entries += params.tlb_sizes[i];
166 }
167
168 assert(entries == env->nb_tlb);
169 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
170
171 env->tlb_dirty = true;
172
173 cfg.array = (uintptr_t)env->tlb.tlbm;
174 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
175 cfg.params = (uintptr_t)&params;
176 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
177
178 encap.cap = KVM_CAP_SW_TLB;
179 encap.args[0] = (uintptr_t)&cfg;
180
181 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
182 if (ret < 0) {
183 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184 __func__, strerror(-ret));
185 return ret;
186 }
187
188 env->kvm_sw_tlb = true;
189 return 0;
190 }
191
192
193 #if defined(TARGET_PPC64)
194 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
195 struct kvm_ppc_smmu_info *info)
196 {
197 CPUPPCState *env = &cpu->env;
198 CPUState *cs = CPU(cpu);
199
200 memset(info, 0, sizeof(*info));
201
202 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203 * need to "guess" what the supported page sizes are.
204 *
205 * For that to work we make a few assumptions:
206 *
207 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208 * KVM which only supports 4K and 16M pages, but supports them
209 * regardless of the backing store characteritics. We also don't
210 * support 1T segments.
211 *
212 * This is safe as if HV KVM ever supports that capability or PR
213 * KVM grows supports for more page/segment sizes, those versions
214 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215 * will not hit this fallback
216 *
217 * - Else we are running HV KVM. This means we only support page
218 * sizes that fit in the backing store. Additionally we only
219 * advertize 64K pages if the processor is ARCH 2.06 and we assume
220 * P7 encodings for the SLB and hash table. Here too, we assume
221 * support for any newer processor will mean a kernel that
222 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
223 * this fallback.
224 */
225 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
226 /* No flags */
227 info->flags = 0;
228 info->slb_size = 64;
229
230 /* Standard 4k base page size segment */
231 info->sps[0].page_shift = 12;
232 info->sps[0].slb_enc = 0;
233 info->sps[0].enc[0].page_shift = 12;
234 info->sps[0].enc[0].pte_enc = 0;
235
236 /* Standard 16M large page size segment */
237 info->sps[1].page_shift = 24;
238 info->sps[1].slb_enc = SLB_VSID_L;
239 info->sps[1].enc[0].page_shift = 24;
240 info->sps[1].enc[0].pte_enc = 0;
241 } else {
242 int i = 0;
243
244 /* HV KVM has backing store size restrictions */
245 info->flags = KVM_PPC_PAGE_SIZES_REAL;
246
247 if (env->mmu_model & POWERPC_MMU_1TSEG) {
248 info->flags |= KVM_PPC_1T_SEGMENTS;
249 }
250
251 if (env->mmu_model == POWERPC_MMU_2_06) {
252 info->slb_size = 32;
253 } else {
254 info->slb_size = 64;
255 }
256
257 /* Standard 4k base page size segment */
258 info->sps[i].page_shift = 12;
259 info->sps[i].slb_enc = 0;
260 info->sps[i].enc[0].page_shift = 12;
261 info->sps[i].enc[0].pte_enc = 0;
262 i++;
263
264 /* 64K on MMU 2.06 */
265 if (env->mmu_model == POWERPC_MMU_2_06) {
266 info->sps[i].page_shift = 16;
267 info->sps[i].slb_enc = 0x110;
268 info->sps[i].enc[0].page_shift = 16;
269 info->sps[i].enc[0].pte_enc = 1;
270 i++;
271 }
272
273 /* Standard 16M large page size segment */
274 info->sps[i].page_shift = 24;
275 info->sps[i].slb_enc = SLB_VSID_L;
276 info->sps[i].enc[0].page_shift = 24;
277 info->sps[i].enc[0].pte_enc = 0;
278 }
279 }
280
281 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
282 {
283 CPUState *cs = CPU(cpu);
284 int ret;
285
286 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
287 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
288 if (ret == 0) {
289 return;
290 }
291 }
292
293 kvm_get_fallback_smmu_info(cpu, info);
294 }
295
296 static long getrampagesize(void)
297 {
298 struct statfs fs;
299 int ret;
300
301 if (!mem_path) {
302 /* guest RAM is backed by normal anonymous pages */
303 return getpagesize();
304 }
305
306 do {
307 ret = statfs(mem_path, &fs);
308 } while (ret != 0 && errno == EINTR);
309
310 if (ret != 0) {
311 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
312 strerror(errno));
313 exit(1);
314 }
315
316 #define HUGETLBFS_MAGIC 0x958458f6
317
318 if (fs.f_type != HUGETLBFS_MAGIC) {
319 /* Explicit mempath, but it's ordinary pages */
320 return getpagesize();
321 }
322
323 /* It's hugepage, return the huge page size */
324 return fs.f_bsize;
325 }
326
327 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
328 {
329 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
330 return true;
331 }
332
333 return (1ul << shift) <= rampgsize;
334 }
335
336 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
337 {
338 static struct kvm_ppc_smmu_info smmu_info;
339 static bool has_smmu_info;
340 CPUPPCState *env = &cpu->env;
341 long rampagesize;
342 int iq, ik, jq, jk;
343
344 /* We only handle page sizes for 64-bit server guests for now */
345 if (!(env->mmu_model & POWERPC_MMU_64)) {
346 return;
347 }
348
349 /* Collect MMU info from kernel if not already */
350 if (!has_smmu_info) {
351 kvm_get_smmu_info(cpu, &smmu_info);
352 has_smmu_info = true;
353 }
354
355 rampagesize = getrampagesize();
356
357 /* Convert to QEMU form */
358 memset(&env->sps, 0, sizeof(env->sps));
359
360 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
361 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
362 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
363
364 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365 ksps->page_shift)) {
366 continue;
367 }
368 qsps->page_shift = ksps->page_shift;
369 qsps->slb_enc = ksps->slb_enc;
370 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
371 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
372 ksps->enc[jk].page_shift)) {
373 continue;
374 }
375 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
376 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
377 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
378 break;
379 }
380 }
381 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
382 break;
383 }
384 }
385 env->slb_nr = smmu_info.slb_size;
386 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
387 env->mmu_model |= POWERPC_MMU_1TSEG;
388 } else {
389 env->mmu_model &= ~POWERPC_MMU_1TSEG;
390 }
391 }
392 #else /* defined (TARGET_PPC64) */
393
394 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
395 {
396 }
397
398 #endif /* !defined (TARGET_PPC64) */
399
400 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
401 {
402 return cpu->cpu_index;
403 }
404
405 int kvm_arch_init_vcpu(CPUState *cs)
406 {
407 PowerPCCPU *cpu = POWERPC_CPU(cs);
408 CPUPPCState *cenv = &cpu->env;
409 int ret;
410
411 /* Gather server mmu info from KVM and update the CPU state */
412 kvm_fixup_page_sizes(cpu);
413
414 /* Synchronize sregs with kvm */
415 ret = kvm_arch_sync_sregs(cpu);
416 if (ret) {
417 return ret;
418 }
419
420 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
421
422 /* Some targets support access to KVM's guest TLB. */
423 switch (cenv->mmu_model) {
424 case POWERPC_MMU_BOOKE206:
425 ret = kvm_booke206_tlb_init(cpu);
426 break;
427 default:
428 break;
429 }
430
431 return ret;
432 }
433
434 void kvm_arch_reset_vcpu(CPUState *cpu)
435 {
436 }
437
438 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
439 {
440 CPUPPCState *env = &cpu->env;
441 CPUState *cs = CPU(cpu);
442 struct kvm_dirty_tlb dirty_tlb;
443 unsigned char *bitmap;
444 int ret;
445
446 if (!env->kvm_sw_tlb) {
447 return;
448 }
449
450 bitmap = g_malloc((env->nb_tlb + 7) / 8);
451 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
452
453 dirty_tlb.bitmap = (uintptr_t)bitmap;
454 dirty_tlb.num_dirty = env->nb_tlb;
455
456 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
457 if (ret) {
458 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
459 __func__, strerror(-ret));
460 }
461
462 g_free(bitmap);
463 }
464
465 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
466 {
467 PowerPCCPU *cpu = POWERPC_CPU(cs);
468 CPUPPCState *env = &cpu->env;
469 union {
470 uint32_t u32;
471 uint64_t u64;
472 } val;
473 struct kvm_one_reg reg = {
474 .id = id,
475 .addr = (uintptr_t) &val,
476 };
477 int ret;
478
479 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
480 if (ret != 0) {
481 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
482 spr, strerror(errno));
483 } else {
484 switch (id & KVM_REG_SIZE_MASK) {
485 case KVM_REG_SIZE_U32:
486 env->spr[spr] = val.u32;
487 break;
488
489 case KVM_REG_SIZE_U64:
490 env->spr[spr] = val.u64;
491 break;
492
493 default:
494 /* Don't handle this size yet */
495 abort();
496 }
497 }
498 }
499
500 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
501 {
502 PowerPCCPU *cpu = POWERPC_CPU(cs);
503 CPUPPCState *env = &cpu->env;
504 union {
505 uint32_t u32;
506 uint64_t u64;
507 } val;
508 struct kvm_one_reg reg = {
509 .id = id,
510 .addr = (uintptr_t) &val,
511 };
512 int ret;
513
514 switch (id & KVM_REG_SIZE_MASK) {
515 case KVM_REG_SIZE_U32:
516 val.u32 = env->spr[spr];
517 break;
518
519 case KVM_REG_SIZE_U64:
520 val.u64 = env->spr[spr];
521 break;
522
523 default:
524 /* Don't handle this size yet */
525 abort();
526 }
527
528 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
529 if (ret != 0) {
530 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
531 spr, strerror(errno));
532 }
533 }
534
535 static int kvm_put_fp(CPUState *cs)
536 {
537 PowerPCCPU *cpu = POWERPC_CPU(cs);
538 CPUPPCState *env = &cpu->env;
539 struct kvm_one_reg reg;
540 int i;
541 int ret;
542
543 if (env->insns_flags & PPC_FLOAT) {
544 uint64_t fpscr = env->fpscr;
545 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
546
547 reg.id = KVM_REG_PPC_FPSCR;
548 reg.addr = (uintptr_t)&fpscr;
549 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
550 if (ret < 0) {
551 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
552 return ret;
553 }
554
555 for (i = 0; i < 32; i++) {
556 uint64_t vsr[2];
557
558 vsr[0] = float64_val(env->fpr[i]);
559 vsr[1] = env->vsr[i];
560 reg.addr = (uintptr_t) &vsr;
561 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
562
563 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
564 if (ret < 0) {
565 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
566 i, strerror(errno));
567 return ret;
568 }
569 }
570 }
571
572 if (env->insns_flags & PPC_ALTIVEC) {
573 reg.id = KVM_REG_PPC_VSCR;
574 reg.addr = (uintptr_t)&env->vscr;
575 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
576 if (ret < 0) {
577 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
578 return ret;
579 }
580
581 for (i = 0; i < 32; i++) {
582 reg.id = KVM_REG_PPC_VR(i);
583 reg.addr = (uintptr_t)&env->avr[i];
584 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
585 if (ret < 0) {
586 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
587 return ret;
588 }
589 }
590 }
591
592 return 0;
593 }
594
595 static int kvm_get_fp(CPUState *cs)
596 {
597 PowerPCCPU *cpu = POWERPC_CPU(cs);
598 CPUPPCState *env = &cpu->env;
599 struct kvm_one_reg reg;
600 int i;
601 int ret;
602
603 if (env->insns_flags & PPC_FLOAT) {
604 uint64_t fpscr;
605 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
606
607 reg.id = KVM_REG_PPC_FPSCR;
608 reg.addr = (uintptr_t)&fpscr;
609 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
610 if (ret < 0) {
611 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
612 return ret;
613 } else {
614 env->fpscr = fpscr;
615 }
616
617 for (i = 0; i < 32; i++) {
618 uint64_t vsr[2];
619
620 reg.addr = (uintptr_t) &vsr;
621 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
622
623 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
624 if (ret < 0) {
625 dprintf("Unable to get %s%d from KVM: %s\n",
626 vsx ? "VSR" : "FPR", i, strerror(errno));
627 return ret;
628 } else {
629 env->fpr[i] = vsr[0];
630 if (vsx) {
631 env->vsr[i] = vsr[1];
632 }
633 }
634 }
635 }
636
637 if (env->insns_flags & PPC_ALTIVEC) {
638 reg.id = KVM_REG_PPC_VSCR;
639 reg.addr = (uintptr_t)&env->vscr;
640 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
641 if (ret < 0) {
642 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
643 return ret;
644 }
645
646 for (i = 0; i < 32; i++) {
647 reg.id = KVM_REG_PPC_VR(i);
648 reg.addr = (uintptr_t)&env->avr[i];
649 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650 if (ret < 0) {
651 dprintf("Unable to get VR%d from KVM: %s\n",
652 i, strerror(errno));
653 return ret;
654 }
655 }
656 }
657
658 return 0;
659 }
660
661 #if defined(TARGET_PPC64)
662 static int kvm_get_vpa(CPUState *cs)
663 {
664 PowerPCCPU *cpu = POWERPC_CPU(cs);
665 CPUPPCState *env = &cpu->env;
666 struct kvm_one_reg reg;
667 int ret;
668
669 reg.id = KVM_REG_PPC_VPA_ADDR;
670 reg.addr = (uintptr_t)&env->vpa_addr;
671 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
672 if (ret < 0) {
673 dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno));
674 return ret;
675 }
676
677 assert((uintptr_t)&env->slb_shadow_size
678 == ((uintptr_t)&env->slb_shadow_addr + 8));
679 reg.id = KVM_REG_PPC_VPA_SLB;
680 reg.addr = (uintptr_t)&env->slb_shadow_addr;
681 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
682 if (ret < 0) {
683 dprintf("Unable to get SLB shadow state from KVM: %s\n",
684 strerror(errno));
685 return ret;
686 }
687
688 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
689 reg.id = KVM_REG_PPC_VPA_DTL;
690 reg.addr = (uintptr_t)&env->dtl_addr;
691 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
692 if (ret < 0) {
693 dprintf("Unable to get dispatch trace log state from KVM: %s\n",
694 strerror(errno));
695 return ret;
696 }
697
698 return 0;
699 }
700
701 static int kvm_put_vpa(CPUState *cs)
702 {
703 PowerPCCPU *cpu = POWERPC_CPU(cs);
704 CPUPPCState *env = &cpu->env;
705 struct kvm_one_reg reg;
706 int ret;
707
708 /* SLB shadow or DTL can't be registered unless a master VPA is
709 * registered. That means when restoring state, if a VPA *is*
710 * registered, we need to set that up first. If not, we need to
711 * deregister the others before deregistering the master VPA */
712 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
713
714 if (env->vpa_addr) {
715 reg.id = KVM_REG_PPC_VPA_ADDR;
716 reg.addr = (uintptr_t)&env->vpa_addr;
717 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718 if (ret < 0) {
719 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
720 return ret;
721 }
722 }
723
724 assert((uintptr_t)&env->slb_shadow_size
725 == ((uintptr_t)&env->slb_shadow_addr + 8));
726 reg.id = KVM_REG_PPC_VPA_SLB;
727 reg.addr = (uintptr_t)&env->slb_shadow_addr;
728 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729 if (ret < 0) {
730 dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
731 return ret;
732 }
733
734 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
735 reg.id = KVM_REG_PPC_VPA_DTL;
736 reg.addr = (uintptr_t)&env->dtl_addr;
737 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
738 if (ret < 0) {
739 dprintf("Unable to set dispatch trace log state to KVM: %s\n",
740 strerror(errno));
741 return ret;
742 }
743
744 if (!env->vpa_addr) {
745 reg.id = KVM_REG_PPC_VPA_ADDR;
746 reg.addr = (uintptr_t)&env->vpa_addr;
747 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
748 if (ret < 0) {
749 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
750 return ret;
751 }
752 }
753
754 return 0;
755 }
756 #endif /* TARGET_PPC64 */
757
758 int kvm_arch_put_registers(CPUState *cs, int level)
759 {
760 PowerPCCPU *cpu = POWERPC_CPU(cs);
761 CPUPPCState *env = &cpu->env;
762 struct kvm_regs regs;
763 int ret;
764 int i;
765
766 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
767 if (ret < 0) {
768 return ret;
769 }
770
771 regs.ctr = env->ctr;
772 regs.lr = env->lr;
773 regs.xer = cpu_read_xer(env);
774 regs.msr = env->msr;
775 regs.pc = env->nip;
776
777 regs.srr0 = env->spr[SPR_SRR0];
778 regs.srr1 = env->spr[SPR_SRR1];
779
780 regs.sprg0 = env->spr[SPR_SPRG0];
781 regs.sprg1 = env->spr[SPR_SPRG1];
782 regs.sprg2 = env->spr[SPR_SPRG2];
783 regs.sprg3 = env->spr[SPR_SPRG3];
784 regs.sprg4 = env->spr[SPR_SPRG4];
785 regs.sprg5 = env->spr[SPR_SPRG5];
786 regs.sprg6 = env->spr[SPR_SPRG6];
787 regs.sprg7 = env->spr[SPR_SPRG7];
788
789 regs.pid = env->spr[SPR_BOOKE_PID];
790
791 for (i = 0;i < 32; i++)
792 regs.gpr[i] = env->gpr[i];
793
794 regs.cr = 0;
795 for (i = 0; i < 8; i++) {
796 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
797 }
798
799 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
800 if (ret < 0)
801 return ret;
802
803 kvm_put_fp(cs);
804
805 if (env->tlb_dirty) {
806 kvm_sw_tlb_put(cpu);
807 env->tlb_dirty = false;
808 }
809
810 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
811 struct kvm_sregs sregs;
812
813 sregs.pvr = env->spr[SPR_PVR];
814
815 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
816
817 /* Sync SLB */
818 #ifdef TARGET_PPC64
819 for (i = 0; i < 64; i++) {
820 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
821 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
822 }
823 #endif
824
825 /* Sync SRs */
826 for (i = 0; i < 16; i++) {
827 sregs.u.s.ppc32.sr[i] = env->sr[i];
828 }
829
830 /* Sync BATs */
831 for (i = 0; i < 8; i++) {
832 /* Beware. We have to swap upper and lower bits here */
833 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
834 | env->DBAT[1][i];
835 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
836 | env->IBAT[1][i];
837 }
838
839 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
840 if (ret) {
841 return ret;
842 }
843 }
844
845 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
846 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
847 }
848
849 if (cap_one_reg) {
850 int i;
851
852 /* We deliberately ignore errors here, for kernels which have
853 * the ONE_REG calls, but don't support the specific
854 * registers, there's a reasonable chance things will still
855 * work, at least until we try to migrate. */
856 for (i = 0; i < 1024; i++) {
857 uint64_t id = env->spr_cb[i].one_reg_id;
858
859 if (id != 0) {
860 kvm_put_one_spr(cs, id, i);
861 }
862 }
863
864 #ifdef TARGET_PPC64
865 if (cap_papr) {
866 if (kvm_put_vpa(cs) < 0) {
867 dprintf("Warning: Unable to set VPA information to KVM\n");
868 }
869 }
870 #endif /* TARGET_PPC64 */
871 }
872
873 return ret;
874 }
875
876 int kvm_arch_get_registers(CPUState *cs)
877 {
878 PowerPCCPU *cpu = POWERPC_CPU(cs);
879 CPUPPCState *env = &cpu->env;
880 struct kvm_regs regs;
881 struct kvm_sregs sregs;
882 uint32_t cr;
883 int i, ret;
884
885 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
886 if (ret < 0)
887 return ret;
888
889 cr = regs.cr;
890 for (i = 7; i >= 0; i--) {
891 env->crf[i] = cr & 15;
892 cr >>= 4;
893 }
894
895 env->ctr = regs.ctr;
896 env->lr = regs.lr;
897 cpu_write_xer(env, regs.xer);
898 env->msr = regs.msr;
899 env->nip = regs.pc;
900
901 env->spr[SPR_SRR0] = regs.srr0;
902 env->spr[SPR_SRR1] = regs.srr1;
903
904 env->spr[SPR_SPRG0] = regs.sprg0;
905 env->spr[SPR_SPRG1] = regs.sprg1;
906 env->spr[SPR_SPRG2] = regs.sprg2;
907 env->spr[SPR_SPRG3] = regs.sprg3;
908 env->spr[SPR_SPRG4] = regs.sprg4;
909 env->spr[SPR_SPRG5] = regs.sprg5;
910 env->spr[SPR_SPRG6] = regs.sprg6;
911 env->spr[SPR_SPRG7] = regs.sprg7;
912
913 env->spr[SPR_BOOKE_PID] = regs.pid;
914
915 for (i = 0;i < 32; i++)
916 env->gpr[i] = regs.gpr[i];
917
918 kvm_get_fp(cs);
919
920 if (cap_booke_sregs) {
921 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
922 if (ret < 0) {
923 return ret;
924 }
925
926 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
927 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
928 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
929 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
930 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
931 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
932 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
933 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
934 env->spr[SPR_DECR] = sregs.u.e.dec;
935 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
936 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
937 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
938 }
939
940 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
941 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
942 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
943 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
944 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
945 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
946 }
947
948 if (sregs.u.e.features & KVM_SREGS_E_64) {
949 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
950 }
951
952 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
953 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
954 }
955
956 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
957 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
958 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
959 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
960 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
961 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
962 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
963 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
964 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
965 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
966 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
967 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
968 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
969 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
970 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
971 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
972 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
973
974 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
975 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
976 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
977 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
978 }
979
980 if (sregs.u.e.features & KVM_SREGS_E_PM) {
981 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
982 }
983
984 if (sregs.u.e.features & KVM_SREGS_E_PC) {
985 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
986 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
987 }
988 }
989
990 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
991 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
992 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
993 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
994 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
995 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
996 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
997 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
998 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
999 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1000 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1001 }
1002
1003 if (sregs.u.e.features & KVM_SREGS_EXP) {
1004 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1005 }
1006
1007 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1008 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1009 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1010 }
1011
1012 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1013 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1014 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1015 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1016
1017 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1018 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1019 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1020 }
1021 }
1022 }
1023
1024 if (cap_segstate) {
1025 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1026 if (ret < 0) {
1027 return ret;
1028 }
1029
1030 ppc_store_sdr1(env, sregs.u.s.sdr1);
1031
1032 /* Sync SLB */
1033 #ifdef TARGET_PPC64
1034 for (i = 0; i < 64; i++) {
1035 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
1036 sregs.u.s.ppc64.slb[i].slbv);
1037 }
1038 #endif
1039
1040 /* Sync SRs */
1041 for (i = 0; i < 16; i++) {
1042 env->sr[i] = sregs.u.s.ppc32.sr[i];
1043 }
1044
1045 /* Sync BATs */
1046 for (i = 0; i < 8; i++) {
1047 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1048 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1049 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1050 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1051 }
1052 }
1053
1054 if (cap_hior) {
1055 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1056 }
1057
1058 if (cap_one_reg) {
1059 int i;
1060
1061 /* We deliberately ignore errors here, for kernels which have
1062 * the ONE_REG calls, but don't support the specific
1063 * registers, there's a reasonable chance things will still
1064 * work, at least until we try to migrate. */
1065 for (i = 0; i < 1024; i++) {
1066 uint64_t id = env->spr_cb[i].one_reg_id;
1067
1068 if (id != 0) {
1069 kvm_get_one_spr(cs, id, i);
1070 }
1071 }
1072
1073 #ifdef TARGET_PPC64
1074 if (cap_papr) {
1075 if (kvm_get_vpa(cs) < 0) {
1076 dprintf("Warning: Unable to get VPA information from KVM\n");
1077 }
1078 }
1079 #endif
1080 }
1081
1082 return 0;
1083 }
1084
1085 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1086 {
1087 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1088
1089 if (irq != PPC_INTERRUPT_EXT) {
1090 return 0;
1091 }
1092
1093 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1094 return 0;
1095 }
1096
1097 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1098
1099 return 0;
1100 }
1101
1102 #if defined(TARGET_PPCEMB)
1103 #define PPC_INPUT_INT PPC40x_INPUT_INT
1104 #elif defined(TARGET_PPC64)
1105 #define PPC_INPUT_INT PPC970_INPUT_INT
1106 #else
1107 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1108 #endif
1109
1110 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1111 {
1112 PowerPCCPU *cpu = POWERPC_CPU(cs);
1113 CPUPPCState *env = &cpu->env;
1114 int r;
1115 unsigned irq;
1116
1117 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1118 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1119 if (!cap_interrupt_level &&
1120 run->ready_for_interrupt_injection &&
1121 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1122 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1123 {
1124 /* For now KVM disregards the 'irq' argument. However, in the
1125 * future KVM could cache it in-kernel to avoid a heavyweight exit
1126 * when reading the UIC.
1127 */
1128 irq = KVM_INTERRUPT_SET;
1129
1130 dprintf("injected interrupt %d\n", irq);
1131 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1132 if (r < 0) {
1133 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1134 }
1135
1136 /* Always wake up soon in case the interrupt was level based */
1137 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1138 (get_ticks_per_sec() / 50));
1139 }
1140
1141 /* We don't know if there are more interrupts pending after this. However,
1142 * the guest will return to userspace in the course of handling this one
1143 * anyways, so we will get a chance to deliver the rest. */
1144 }
1145
1146 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1147 {
1148 }
1149
1150 int kvm_arch_process_async_events(CPUState *cs)
1151 {
1152 return cs->halted;
1153 }
1154
1155 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1156 {
1157 CPUState *cs = CPU(cpu);
1158 CPUPPCState *env = &cpu->env;
1159
1160 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1161 cs->halted = 1;
1162 env->exception_index = EXCP_HLT;
1163 }
1164
1165 return 0;
1166 }
1167
1168 /* map dcr access to existing qemu dcr emulation */
1169 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1170 {
1171 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1172 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1173
1174 return 0;
1175 }
1176
1177 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1178 {
1179 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1180 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1181
1182 return 0;
1183 }
1184
1185 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1186 {
1187 PowerPCCPU *cpu = POWERPC_CPU(cs);
1188 CPUPPCState *env = &cpu->env;
1189 int ret;
1190
1191 switch (run->exit_reason) {
1192 case KVM_EXIT_DCR:
1193 if (run->dcr.is_write) {
1194 dprintf("handle dcr write\n");
1195 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1196 } else {
1197 dprintf("handle dcr read\n");
1198 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1199 }
1200 break;
1201 case KVM_EXIT_HLT:
1202 dprintf("handle halt\n");
1203 ret = kvmppc_handle_halt(cpu);
1204 break;
1205 #if defined(TARGET_PPC64)
1206 case KVM_EXIT_PAPR_HCALL:
1207 dprintf("handle PAPR hypercall\n");
1208 run->papr_hcall.ret = spapr_hypercall(cpu,
1209 run->papr_hcall.nr,
1210 run->papr_hcall.args);
1211 ret = 0;
1212 break;
1213 #endif
1214 case KVM_EXIT_EPR:
1215 dprintf("handle epr\n");
1216 run->epr.epr = ldl_phys(env->mpic_iack);
1217 ret = 0;
1218 break;
1219 case KVM_EXIT_WATCHDOG:
1220 dprintf("handle watchdog expiry\n");
1221 watchdog_perform_action();
1222 ret = 0;
1223 break;
1224
1225 default:
1226 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1227 ret = -1;
1228 break;
1229 }
1230
1231 return ret;
1232 }
1233
1234 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1235 {
1236 CPUState *cs = CPU(cpu);
1237 uint32_t bits = tsr_bits;
1238 struct kvm_one_reg reg = {
1239 .id = KVM_REG_PPC_OR_TSR,
1240 .addr = (uintptr_t) &bits,
1241 };
1242
1243 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1244 }
1245
1246 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1247 {
1248
1249 CPUState *cs = CPU(cpu);
1250 uint32_t bits = tsr_bits;
1251 struct kvm_one_reg reg = {
1252 .id = KVM_REG_PPC_CLEAR_TSR,
1253 .addr = (uintptr_t) &bits,
1254 };
1255
1256 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1257 }
1258
1259 int kvmppc_set_tcr(PowerPCCPU *cpu)
1260 {
1261 CPUState *cs = CPU(cpu);
1262 CPUPPCState *env = &cpu->env;
1263 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1264
1265 struct kvm_one_reg reg = {
1266 .id = KVM_REG_PPC_TCR,
1267 .addr = (uintptr_t) &tcr,
1268 };
1269
1270 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1271 }
1272
1273 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1274 {
1275 CPUState *cs = CPU(cpu);
1276 struct kvm_enable_cap encap = {};
1277 int ret;
1278
1279 if (!kvm_enabled()) {
1280 return -1;
1281 }
1282
1283 if (!cap_ppc_watchdog) {
1284 printf("warning: KVM does not support watchdog");
1285 return -1;
1286 }
1287
1288 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1289 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1290 if (ret < 0) {
1291 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1292 __func__, strerror(-ret));
1293 return ret;
1294 }
1295
1296 return ret;
1297 }
1298
1299 static int read_cpuinfo(const char *field, char *value, int len)
1300 {
1301 FILE *f;
1302 int ret = -1;
1303 int field_len = strlen(field);
1304 char line[512];
1305
1306 f = fopen("/proc/cpuinfo", "r");
1307 if (!f) {
1308 return -1;
1309 }
1310
1311 do {
1312 if(!fgets(line, sizeof(line), f)) {
1313 break;
1314 }
1315 if (!strncmp(line, field, field_len)) {
1316 pstrcpy(value, len, line);
1317 ret = 0;
1318 break;
1319 }
1320 } while(*line);
1321
1322 fclose(f);
1323
1324 return ret;
1325 }
1326
1327 uint32_t kvmppc_get_tbfreq(void)
1328 {
1329 char line[512];
1330 char *ns;
1331 uint32_t retval = get_ticks_per_sec();
1332
1333 if (read_cpuinfo("timebase", line, sizeof(line))) {
1334 return retval;
1335 }
1336
1337 if (!(ns = strchr(line, ':'))) {
1338 return retval;
1339 }
1340
1341 ns++;
1342
1343 retval = atoi(ns);
1344 return retval;
1345 }
1346
1347 /* Try to find a device tree node for a CPU with clock-frequency property */
1348 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1349 {
1350 struct dirent *dirp;
1351 DIR *dp;
1352
1353 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1354 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1355 return -1;
1356 }
1357
1358 buf[0] = '\0';
1359 while ((dirp = readdir(dp)) != NULL) {
1360 FILE *f;
1361 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1362 dirp->d_name);
1363 f = fopen(buf, "r");
1364 if (f) {
1365 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1366 fclose(f);
1367 break;
1368 }
1369 buf[0] = '\0';
1370 }
1371 closedir(dp);
1372 if (buf[0] == '\0') {
1373 printf("Unknown host!\n");
1374 return -1;
1375 }
1376
1377 return 0;
1378 }
1379
1380 /* Read a CPU node property from the host device tree that's a single
1381 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1382 * (can't find or open the property, or doesn't understand the
1383 * format) */
1384 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1385 {
1386 char buf[PATH_MAX];
1387 union {
1388 uint32_t v32;
1389 uint64_t v64;
1390 } u;
1391 FILE *f;
1392 int len;
1393
1394 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1395 return -1;
1396 }
1397
1398 strncat(buf, "/", sizeof(buf) - strlen(buf));
1399 strncat(buf, propname, sizeof(buf) - strlen(buf));
1400
1401 f = fopen(buf, "rb");
1402 if (!f) {
1403 return -1;
1404 }
1405
1406 len = fread(&u, 1, sizeof(u), f);
1407 fclose(f);
1408 switch (len) {
1409 case 4:
1410 /* property is a 32-bit quantity */
1411 return be32_to_cpu(u.v32);
1412 case 8:
1413 return be64_to_cpu(u.v64);
1414 }
1415
1416 return 0;
1417 }
1418
1419 uint64_t kvmppc_get_clockfreq(void)
1420 {
1421 return kvmppc_read_int_cpu_dt("clock-frequency");
1422 }
1423
1424 uint32_t kvmppc_get_vmx(void)
1425 {
1426 return kvmppc_read_int_cpu_dt("ibm,vmx");
1427 }
1428
1429 uint32_t kvmppc_get_dfp(void)
1430 {
1431 return kvmppc_read_int_cpu_dt("ibm,dfp");
1432 }
1433
1434 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1435 {
1436 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1437 CPUState *cs = CPU(cpu);
1438
1439 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1440 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1441 return 0;
1442 }
1443
1444 return 1;
1445 }
1446
1447 int kvmppc_get_hasidle(CPUPPCState *env)
1448 {
1449 struct kvm_ppc_pvinfo pvinfo;
1450
1451 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1452 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1453 return 1;
1454 }
1455
1456 return 0;
1457 }
1458
1459 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1460 {
1461 uint32_t *hc = (uint32_t*)buf;
1462 struct kvm_ppc_pvinfo pvinfo;
1463
1464 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1465 memcpy(buf, pvinfo.hcall, buf_len);
1466 return 0;
1467 }
1468
1469 /*
1470 * Fallback to always fail hypercalls:
1471 *
1472 * li r3, -1
1473 * nop
1474 * nop
1475 * nop
1476 */
1477
1478 hc[0] = 0x3860ffff;
1479 hc[1] = 0x60000000;
1480 hc[2] = 0x60000000;
1481 hc[3] = 0x60000000;
1482
1483 return 0;
1484 }
1485
1486 void kvmppc_set_papr(PowerPCCPU *cpu)
1487 {
1488 CPUPPCState *env = &cpu->env;
1489 CPUState *cs = CPU(cpu);
1490 struct kvm_enable_cap cap = {};
1491 int ret;
1492
1493 cap.cap = KVM_CAP_PPC_PAPR;
1494 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1495
1496 if (ret) {
1497 cpu_abort(env, "This KVM version does not support PAPR\n");
1498 }
1499
1500 /* Update the capability flag so we sync the right information
1501 * with kvm */
1502 cap_papr = 1;
1503 }
1504
1505 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1506 {
1507 CPUPPCState *env = &cpu->env;
1508 CPUState *cs = CPU(cpu);
1509 struct kvm_enable_cap cap = {};
1510 int ret;
1511
1512 cap.cap = KVM_CAP_PPC_EPR;
1513 cap.args[0] = mpic_proxy;
1514 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1515
1516 if (ret && mpic_proxy) {
1517 cpu_abort(env, "This KVM version does not support EPR\n");
1518 }
1519 }
1520
1521 int kvmppc_smt_threads(void)
1522 {
1523 return cap_ppc_smt ? cap_ppc_smt : 1;
1524 }
1525
1526 #ifdef TARGET_PPC64
1527 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1528 {
1529 void *rma;
1530 off_t size;
1531 int fd;
1532 struct kvm_allocate_rma ret;
1533 MemoryRegion *rma_region;
1534
1535 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1536 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1537 * not necessary on this hardware
1538 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1539 *
1540 * FIXME: We should allow the user to force contiguous RMA
1541 * allocation in the cap_ppc_rma==1 case.
1542 */
1543 if (cap_ppc_rma < 2) {
1544 return 0;
1545 }
1546
1547 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1548 if (fd < 0) {
1549 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1550 strerror(errno));
1551 return -1;
1552 }
1553
1554 size = MIN(ret.rma_size, 256ul << 20);
1555
1556 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1557 if (rma == MAP_FAILED) {
1558 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1559 return -1;
1560 };
1561
1562 rma_region = g_new(MemoryRegion, 1);
1563 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1564 vmstate_register_ram_global(rma_region);
1565 memory_region_add_subregion(sysmem, 0, rma_region);
1566
1567 return size;
1568 }
1569
1570 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1571 {
1572 struct kvm_ppc_smmu_info info;
1573 long rampagesize, best_page_shift;
1574 int i;
1575
1576 if (cap_ppc_rma >= 2) {
1577 return current_size;
1578 }
1579
1580 /* Find the largest hardware supported page size that's less than
1581 * or equal to the (logical) backing page size of guest RAM */
1582 kvm_get_smmu_info(ppc_env_get_cpu(first_cpu), &info);
1583 rampagesize = getrampagesize();
1584 best_page_shift = 0;
1585
1586 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1587 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1588
1589 if (!sps->page_shift) {
1590 continue;
1591 }
1592
1593 if ((sps->page_shift > best_page_shift)
1594 && ((1UL << sps->page_shift) <= rampagesize)) {
1595 best_page_shift = sps->page_shift;
1596 }
1597 }
1598
1599 return MIN(current_size,
1600 1ULL << (best_page_shift + hash_shift - 7));
1601 }
1602 #endif
1603
1604 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1605 {
1606 struct kvm_create_spapr_tce args = {
1607 .liobn = liobn,
1608 .window_size = window_size,
1609 };
1610 long len;
1611 int fd;
1612 void *table;
1613
1614 /* Must set fd to -1 so we don't try to munmap when called for
1615 * destroying the table, which the upper layers -will- do
1616 */
1617 *pfd = -1;
1618 if (!cap_spapr_tce) {
1619 return NULL;
1620 }
1621
1622 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1623 if (fd < 0) {
1624 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1625 liobn);
1626 return NULL;
1627 }
1628
1629 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1630 /* FIXME: round this up to page size */
1631
1632 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1633 if (table == MAP_FAILED) {
1634 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1635 liobn);
1636 close(fd);
1637 return NULL;
1638 }
1639
1640 *pfd = fd;
1641 return table;
1642 }
1643
1644 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1645 {
1646 long len;
1647
1648 if (fd < 0) {
1649 return -1;
1650 }
1651
1652 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1653 if ((munmap(table, len) < 0) ||
1654 (close(fd) < 0)) {
1655 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1656 strerror(errno));
1657 /* Leak the table */
1658 }
1659
1660 return 0;
1661 }
1662
1663 int kvmppc_reset_htab(int shift_hint)
1664 {
1665 uint32_t shift = shift_hint;
1666
1667 if (!kvm_enabled()) {
1668 /* Full emulation, tell caller to allocate htab itself */
1669 return 0;
1670 }
1671 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1672 int ret;
1673 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1674 if (ret == -ENOTTY) {
1675 /* At least some versions of PR KVM advertise the
1676 * capability, but don't implement the ioctl(). Oops.
1677 * Return 0 so that we allocate the htab in qemu, as is
1678 * correct for PR. */
1679 return 0;
1680 } else if (ret < 0) {
1681 return ret;
1682 }
1683 return shift;
1684 }
1685
1686 /* We have a kernel that predates the htab reset calls. For PR
1687 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1688 * this era, it has allocated a 16MB fixed size hash table
1689 * already. Kernels of this era have the GET_PVINFO capability
1690 * only on PR, so we use this hack to determine the right
1691 * answer */
1692 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1693 /* PR - tell caller to allocate htab */
1694 return 0;
1695 } else {
1696 /* HV - assume 16MB kernel allocated htab */
1697 return 24;
1698 }
1699 }
1700
1701 static inline uint32_t mfpvr(void)
1702 {
1703 uint32_t pvr;
1704
1705 asm ("mfpvr %0"
1706 : "=r"(pvr));
1707 return pvr;
1708 }
1709
1710 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1711 {
1712 if (on) {
1713 *word |= flags;
1714 } else {
1715 *word &= ~flags;
1716 }
1717 }
1718
1719 static void kvmppc_host_cpu_initfn(Object *obj)
1720 {
1721 assert(kvm_enabled());
1722 }
1723
1724 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1725 {
1726 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1727 uint32_t vmx = kvmppc_get_vmx();
1728 uint32_t dfp = kvmppc_get_dfp();
1729 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1730 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1731
1732 /* Now fix up the class with information we can query from the host */
1733
1734 if (vmx != -1) {
1735 /* Only override when we know what the host supports */
1736 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1737 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1738 }
1739 if (dfp != -1) {
1740 /* Only override when we know what the host supports */
1741 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1742 }
1743
1744 if (dcache_size != -1) {
1745 pcc->l1_dcache_size = dcache_size;
1746 }
1747
1748 if (icache_size != -1) {
1749 pcc->l1_icache_size = icache_size;
1750 }
1751 }
1752
1753 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1754 {
1755 CPUState *cs = CPU(cpu);
1756 int smt;
1757
1758 /* Adjust cpu index for SMT */
1759 smt = kvmppc_smt_threads();
1760 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1761 + (cs->cpu_index % smp_threads);
1762
1763 return 0;
1764 }
1765
1766 bool kvmppc_has_cap_epr(void)
1767 {
1768 return cap_epr;
1769 }
1770
1771 static int kvm_ppc_register_host_cpu_type(void)
1772 {
1773 TypeInfo type_info = {
1774 .name = TYPE_HOST_POWERPC_CPU,
1775 .instance_init = kvmppc_host_cpu_initfn,
1776 .class_init = kvmppc_host_cpu_class_init,
1777 };
1778 uint32_t host_pvr = mfpvr();
1779 PowerPCCPUClass *pvr_pcc;
1780
1781 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1782 if (pvr_pcc == NULL) {
1783 return -1;
1784 }
1785 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1786 type_register(&type_info);
1787 return 0;
1788 }
1789
1790
1791 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1792 {
1793 return true;
1794 }
1795
1796 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1797 {
1798 return 1;
1799 }
1800
1801 int kvm_arch_on_sigbus(int code, void *addr)
1802 {
1803 return 1;
1804 }
1805
1806 void kvm_arch_init_irq_routing(KVMState *s)
1807 {
1808 }