cpu: Move halted and interrupt_request fields to CPUState
[qemu.git] / target-ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
22
23 #include <linux/kvm.h>
24
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
39
40 //#define DEBUG_KVM
41
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
49
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
51
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
54 };
55
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64 static int cap_one_reg;
65
66 /* XXX We have a race condition where we actually have a level triggered
67 * interrupt, but the infrastructure can't expose that yet, so the guest
68 * takes but ignores it, goes to sleep and never gets notified that there's
69 * still an interrupt pending.
70 *
71 * As a quick workaround, let's just wake up again 20 ms after we injected
72 * an interrupt. That way we can assure that we're always reinjecting
73 * interrupts in case the guest swallowed them.
74 */
75 static QEMUTimer *idle_timer;
76
77 static void kvm_kick_cpu(void *opaque)
78 {
79 PowerPCCPU *cpu = opaque;
80
81 qemu_cpu_kick(CPU(cpu));
82 }
83
84 static int kvm_ppc_register_host_cpu_type(void);
85
86 int kvm_arch_init(KVMState *s)
87 {
88 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
89 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
91 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
92 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
93 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
94 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
95 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
96 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
97
98 if (!cap_interrupt_level) {
99 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
100 "VM to stall at times!\n");
101 }
102
103 kvm_ppc_register_host_cpu_type();
104
105 return 0;
106 }
107
108 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
109 {
110 CPUPPCState *cenv = &cpu->env;
111 CPUState *cs = CPU(cpu);
112 struct kvm_sregs sregs;
113 int ret;
114
115 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
116 /* What we're really trying to say is "if we're on BookE, we use
117 the native PVR for now". This is the only sane way to check
118 it though, so we potentially confuse users that they can run
119 BookE guests on BookS. Let's hope nobody dares enough :) */
120 return 0;
121 } else {
122 if (!cap_segstate) {
123 fprintf(stderr, "kvm error: missing PVR setting capability\n");
124 return -ENOSYS;
125 }
126 }
127
128 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
129 if (ret) {
130 return ret;
131 }
132
133 sregs.pvr = cenv->spr[SPR_PVR];
134 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
135 }
136
137 /* Set up a shared TLB array with KVM */
138 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
139 {
140 CPUPPCState *env = &cpu->env;
141 CPUState *cs = CPU(cpu);
142 struct kvm_book3e_206_tlb_params params = {};
143 struct kvm_config_tlb cfg = {};
144 struct kvm_enable_cap encap = {};
145 unsigned int entries = 0;
146 int ret, i;
147
148 if (!kvm_enabled() ||
149 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
150 return 0;
151 }
152
153 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
154
155 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
156 params.tlb_sizes[i] = booke206_tlb_size(env, i);
157 params.tlb_ways[i] = booke206_tlb_ways(env, i);
158 entries += params.tlb_sizes[i];
159 }
160
161 assert(entries == env->nb_tlb);
162 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
163
164 env->tlb_dirty = true;
165
166 cfg.array = (uintptr_t)env->tlb.tlbm;
167 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
168 cfg.params = (uintptr_t)&params;
169 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
170
171 encap.cap = KVM_CAP_SW_TLB;
172 encap.args[0] = (uintptr_t)&cfg;
173
174 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
175 if (ret < 0) {
176 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
177 __func__, strerror(-ret));
178 return ret;
179 }
180
181 env->kvm_sw_tlb = true;
182 return 0;
183 }
184
185
186 #if defined(TARGET_PPC64)
187 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
188 struct kvm_ppc_smmu_info *info)
189 {
190 CPUPPCState *env = &cpu->env;
191 CPUState *cs = CPU(cpu);
192
193 memset(info, 0, sizeof(*info));
194
195 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
196 * need to "guess" what the supported page sizes are.
197 *
198 * For that to work we make a few assumptions:
199 *
200 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
201 * KVM which only supports 4K and 16M pages, but supports them
202 * regardless of the backing store characteritics. We also don't
203 * support 1T segments.
204 *
205 * This is safe as if HV KVM ever supports that capability or PR
206 * KVM grows supports for more page/segment sizes, those versions
207 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
208 * will not hit this fallback
209 *
210 * - Else we are running HV KVM. This means we only support page
211 * sizes that fit in the backing store. Additionally we only
212 * advertize 64K pages if the processor is ARCH 2.06 and we assume
213 * P7 encodings for the SLB and hash table. Here too, we assume
214 * support for any newer processor will mean a kernel that
215 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
216 * this fallback.
217 */
218 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
219 /* No flags */
220 info->flags = 0;
221 info->slb_size = 64;
222
223 /* Standard 4k base page size segment */
224 info->sps[0].page_shift = 12;
225 info->sps[0].slb_enc = 0;
226 info->sps[0].enc[0].page_shift = 12;
227 info->sps[0].enc[0].pte_enc = 0;
228
229 /* Standard 16M large page size segment */
230 info->sps[1].page_shift = 24;
231 info->sps[1].slb_enc = SLB_VSID_L;
232 info->sps[1].enc[0].page_shift = 24;
233 info->sps[1].enc[0].pte_enc = 0;
234 } else {
235 int i = 0;
236
237 /* HV KVM has backing store size restrictions */
238 info->flags = KVM_PPC_PAGE_SIZES_REAL;
239
240 if (env->mmu_model & POWERPC_MMU_1TSEG) {
241 info->flags |= KVM_PPC_1T_SEGMENTS;
242 }
243
244 if (env->mmu_model == POWERPC_MMU_2_06) {
245 info->slb_size = 32;
246 } else {
247 info->slb_size = 64;
248 }
249
250 /* Standard 4k base page size segment */
251 info->sps[i].page_shift = 12;
252 info->sps[i].slb_enc = 0;
253 info->sps[i].enc[0].page_shift = 12;
254 info->sps[i].enc[0].pte_enc = 0;
255 i++;
256
257 /* 64K on MMU 2.06 */
258 if (env->mmu_model == POWERPC_MMU_2_06) {
259 info->sps[i].page_shift = 16;
260 info->sps[i].slb_enc = 0x110;
261 info->sps[i].enc[0].page_shift = 16;
262 info->sps[i].enc[0].pte_enc = 1;
263 i++;
264 }
265
266 /* Standard 16M large page size segment */
267 info->sps[i].page_shift = 24;
268 info->sps[i].slb_enc = SLB_VSID_L;
269 info->sps[i].enc[0].page_shift = 24;
270 info->sps[i].enc[0].pte_enc = 0;
271 }
272 }
273
274 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
275 {
276 CPUState *cs = CPU(cpu);
277 int ret;
278
279 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
280 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
281 if (ret == 0) {
282 return;
283 }
284 }
285
286 kvm_get_fallback_smmu_info(cpu, info);
287 }
288
289 static long getrampagesize(void)
290 {
291 struct statfs fs;
292 int ret;
293
294 if (!mem_path) {
295 /* guest RAM is backed by normal anonymous pages */
296 return getpagesize();
297 }
298
299 do {
300 ret = statfs(mem_path, &fs);
301 } while (ret != 0 && errno == EINTR);
302
303 if (ret != 0) {
304 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
305 strerror(errno));
306 exit(1);
307 }
308
309 #define HUGETLBFS_MAGIC 0x958458f6
310
311 if (fs.f_type != HUGETLBFS_MAGIC) {
312 /* Explicit mempath, but it's ordinary pages */
313 return getpagesize();
314 }
315
316 /* It's hugepage, return the huge page size */
317 return fs.f_bsize;
318 }
319
320 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
321 {
322 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
323 return true;
324 }
325
326 return (1ul << shift) <= rampgsize;
327 }
328
329 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
330 {
331 static struct kvm_ppc_smmu_info smmu_info;
332 static bool has_smmu_info;
333 CPUPPCState *env = &cpu->env;
334 long rampagesize;
335 int iq, ik, jq, jk;
336
337 /* We only handle page sizes for 64-bit server guests for now */
338 if (!(env->mmu_model & POWERPC_MMU_64)) {
339 return;
340 }
341
342 /* Collect MMU info from kernel if not already */
343 if (!has_smmu_info) {
344 kvm_get_smmu_info(cpu, &smmu_info);
345 has_smmu_info = true;
346 }
347
348 rampagesize = getrampagesize();
349
350 /* Convert to QEMU form */
351 memset(&env->sps, 0, sizeof(env->sps));
352
353 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
354 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
355 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
356
357 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
358 ksps->page_shift)) {
359 continue;
360 }
361 qsps->page_shift = ksps->page_shift;
362 qsps->slb_enc = ksps->slb_enc;
363 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
364 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365 ksps->enc[jk].page_shift)) {
366 continue;
367 }
368 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
369 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
370 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
371 break;
372 }
373 }
374 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
375 break;
376 }
377 }
378 env->slb_nr = smmu_info.slb_size;
379 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
380 env->mmu_model |= POWERPC_MMU_1TSEG;
381 } else {
382 env->mmu_model &= ~POWERPC_MMU_1TSEG;
383 }
384 }
385 #else /* defined (TARGET_PPC64) */
386
387 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
388 {
389 }
390
391 #endif /* !defined (TARGET_PPC64) */
392
393 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
394 {
395 return cpu->cpu_index;
396 }
397
398 int kvm_arch_init_vcpu(CPUState *cs)
399 {
400 PowerPCCPU *cpu = POWERPC_CPU(cs);
401 CPUPPCState *cenv = &cpu->env;
402 int ret;
403
404 /* Gather server mmu info from KVM and update the CPU state */
405 kvm_fixup_page_sizes(cpu);
406
407 /* Synchronize sregs with kvm */
408 ret = kvm_arch_sync_sregs(cpu);
409 if (ret) {
410 return ret;
411 }
412
413 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
414
415 /* Some targets support access to KVM's guest TLB. */
416 switch (cenv->mmu_model) {
417 case POWERPC_MMU_BOOKE206:
418 ret = kvm_booke206_tlb_init(cpu);
419 break;
420 default:
421 break;
422 }
423
424 return ret;
425 }
426
427 void kvm_arch_reset_vcpu(CPUState *cpu)
428 {
429 }
430
431 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
432 {
433 CPUPPCState *env = &cpu->env;
434 CPUState *cs = CPU(cpu);
435 struct kvm_dirty_tlb dirty_tlb;
436 unsigned char *bitmap;
437 int ret;
438
439 if (!env->kvm_sw_tlb) {
440 return;
441 }
442
443 bitmap = g_malloc((env->nb_tlb + 7) / 8);
444 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
445
446 dirty_tlb.bitmap = (uintptr_t)bitmap;
447 dirty_tlb.num_dirty = env->nb_tlb;
448
449 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
450 if (ret) {
451 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
452 __func__, strerror(-ret));
453 }
454
455 g_free(bitmap);
456 }
457
458 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
459 {
460 PowerPCCPU *cpu = POWERPC_CPU(cs);
461 CPUPPCState *env = &cpu->env;
462 union {
463 uint32_t u32;
464 uint64_t u64;
465 } val;
466 struct kvm_one_reg reg = {
467 .id = id,
468 .addr = (uintptr_t) &val,
469 };
470 int ret;
471
472 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
473 if (ret != 0) {
474 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
475 spr, strerror(errno));
476 } else {
477 switch (id & KVM_REG_SIZE_MASK) {
478 case KVM_REG_SIZE_U32:
479 env->spr[spr] = val.u32;
480 break;
481
482 case KVM_REG_SIZE_U64:
483 env->spr[spr] = val.u64;
484 break;
485
486 default:
487 /* Don't handle this size yet */
488 abort();
489 }
490 }
491 }
492
493 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
494 {
495 PowerPCCPU *cpu = POWERPC_CPU(cs);
496 CPUPPCState *env = &cpu->env;
497 union {
498 uint32_t u32;
499 uint64_t u64;
500 } val;
501 struct kvm_one_reg reg = {
502 .id = id,
503 .addr = (uintptr_t) &val,
504 };
505 int ret;
506
507 switch (id & KVM_REG_SIZE_MASK) {
508 case KVM_REG_SIZE_U32:
509 val.u32 = env->spr[spr];
510 break;
511
512 case KVM_REG_SIZE_U64:
513 val.u64 = env->spr[spr];
514 break;
515
516 default:
517 /* Don't handle this size yet */
518 abort();
519 }
520
521 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
522 if (ret != 0) {
523 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
524 spr, strerror(errno));
525 }
526 }
527
528 static int kvm_put_fp(CPUState *cs)
529 {
530 PowerPCCPU *cpu = POWERPC_CPU(cs);
531 CPUPPCState *env = &cpu->env;
532 struct kvm_one_reg reg;
533 int i;
534 int ret;
535
536 if (env->insns_flags & PPC_FLOAT) {
537 uint64_t fpscr = env->fpscr;
538 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
539
540 reg.id = KVM_REG_PPC_FPSCR;
541 reg.addr = (uintptr_t)&fpscr;
542 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
543 if (ret < 0) {
544 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
545 return ret;
546 }
547
548 for (i = 0; i < 32; i++) {
549 uint64_t vsr[2];
550
551 vsr[0] = float64_val(env->fpr[i]);
552 vsr[1] = env->vsr[i];
553 reg.addr = (uintptr_t) &vsr;
554 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
555
556 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
557 if (ret < 0) {
558 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
559 i, strerror(errno));
560 return ret;
561 }
562 }
563 }
564
565 if (env->insns_flags & PPC_ALTIVEC) {
566 reg.id = KVM_REG_PPC_VSCR;
567 reg.addr = (uintptr_t)&env->vscr;
568 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
569 if (ret < 0) {
570 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
571 return ret;
572 }
573
574 for (i = 0; i < 32; i++) {
575 reg.id = KVM_REG_PPC_VR(i);
576 reg.addr = (uintptr_t)&env->avr[i];
577 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
578 if (ret < 0) {
579 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
580 return ret;
581 }
582 }
583 }
584
585 return 0;
586 }
587
588 static int kvm_get_fp(CPUState *cs)
589 {
590 PowerPCCPU *cpu = POWERPC_CPU(cs);
591 CPUPPCState *env = &cpu->env;
592 struct kvm_one_reg reg;
593 int i;
594 int ret;
595
596 if (env->insns_flags & PPC_FLOAT) {
597 uint64_t fpscr;
598 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
599
600 reg.id = KVM_REG_PPC_FPSCR;
601 reg.addr = (uintptr_t)&fpscr;
602 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
603 if (ret < 0) {
604 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
605 return ret;
606 } else {
607 env->fpscr = fpscr;
608 }
609
610 for (i = 0; i < 32; i++) {
611 uint64_t vsr[2];
612
613 reg.addr = (uintptr_t) &vsr;
614 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
615
616 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
617 if (ret < 0) {
618 dprintf("Unable to get %s%d from KVM: %s\n",
619 vsx ? "VSR" : "FPR", i, strerror(errno));
620 return ret;
621 } else {
622 env->fpr[i] = vsr[0];
623 if (vsx) {
624 env->vsr[i] = vsr[1];
625 }
626 }
627 }
628 }
629
630 if (env->insns_flags & PPC_ALTIVEC) {
631 reg.id = KVM_REG_PPC_VSCR;
632 reg.addr = (uintptr_t)&env->vscr;
633 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
634 if (ret < 0) {
635 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
636 return ret;
637 }
638
639 for (i = 0; i < 32; i++) {
640 reg.id = KVM_REG_PPC_VR(i);
641 reg.addr = (uintptr_t)&env->avr[i];
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret < 0) {
644 dprintf("Unable to get VR%d from KVM: %s\n",
645 i, strerror(errno));
646 return ret;
647 }
648 }
649 }
650
651 return 0;
652 }
653
654 int kvm_arch_put_registers(CPUState *cs, int level)
655 {
656 PowerPCCPU *cpu = POWERPC_CPU(cs);
657 CPUPPCState *env = &cpu->env;
658 struct kvm_regs regs;
659 int ret;
660 int i;
661
662 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
663 if (ret < 0) {
664 return ret;
665 }
666
667 regs.ctr = env->ctr;
668 regs.lr = env->lr;
669 regs.xer = cpu_read_xer(env);
670 regs.msr = env->msr;
671 regs.pc = env->nip;
672
673 regs.srr0 = env->spr[SPR_SRR0];
674 regs.srr1 = env->spr[SPR_SRR1];
675
676 regs.sprg0 = env->spr[SPR_SPRG0];
677 regs.sprg1 = env->spr[SPR_SPRG1];
678 regs.sprg2 = env->spr[SPR_SPRG2];
679 regs.sprg3 = env->spr[SPR_SPRG3];
680 regs.sprg4 = env->spr[SPR_SPRG4];
681 regs.sprg5 = env->spr[SPR_SPRG5];
682 regs.sprg6 = env->spr[SPR_SPRG6];
683 regs.sprg7 = env->spr[SPR_SPRG7];
684
685 regs.pid = env->spr[SPR_BOOKE_PID];
686
687 for (i = 0;i < 32; i++)
688 regs.gpr[i] = env->gpr[i];
689
690 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
691 if (ret < 0)
692 return ret;
693
694 kvm_put_fp(cs);
695
696 if (env->tlb_dirty) {
697 kvm_sw_tlb_put(cpu);
698 env->tlb_dirty = false;
699 }
700
701 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
702 struct kvm_sregs sregs;
703
704 sregs.pvr = env->spr[SPR_PVR];
705
706 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
707
708 /* Sync SLB */
709 #ifdef TARGET_PPC64
710 for (i = 0; i < 64; i++) {
711 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
712 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
713 }
714 #endif
715
716 /* Sync SRs */
717 for (i = 0; i < 16; i++) {
718 sregs.u.s.ppc32.sr[i] = env->sr[i];
719 }
720
721 /* Sync BATs */
722 for (i = 0; i < 8; i++) {
723 /* Beware. We have to swap upper and lower bits here */
724 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
725 | env->DBAT[1][i];
726 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
727 | env->IBAT[1][i];
728 }
729
730 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
731 if (ret) {
732 return ret;
733 }
734 }
735
736 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
737 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
738 }
739
740 if (cap_one_reg) {
741 int i;
742
743 /* We deliberately ignore errors here, for kernels which have
744 * the ONE_REG calls, but don't support the specific
745 * registers, there's a reasonable chance things will still
746 * work, at least until we try to migrate. */
747 for (i = 0; i < 1024; i++) {
748 uint64_t id = env->spr_cb[i].one_reg_id;
749
750 if (id != 0) {
751 kvm_put_one_spr(cs, id, i);
752 }
753 }
754 }
755
756 return ret;
757 }
758
759 int kvm_arch_get_registers(CPUState *cs)
760 {
761 PowerPCCPU *cpu = POWERPC_CPU(cs);
762 CPUPPCState *env = &cpu->env;
763 struct kvm_regs regs;
764 struct kvm_sregs sregs;
765 uint32_t cr;
766 int i, ret;
767
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
769 if (ret < 0)
770 return ret;
771
772 cr = regs.cr;
773 for (i = 7; i >= 0; i--) {
774 env->crf[i] = cr & 15;
775 cr >>= 4;
776 }
777
778 env->ctr = regs.ctr;
779 env->lr = regs.lr;
780 cpu_write_xer(env, regs.xer);
781 env->msr = regs.msr;
782 env->nip = regs.pc;
783
784 env->spr[SPR_SRR0] = regs.srr0;
785 env->spr[SPR_SRR1] = regs.srr1;
786
787 env->spr[SPR_SPRG0] = regs.sprg0;
788 env->spr[SPR_SPRG1] = regs.sprg1;
789 env->spr[SPR_SPRG2] = regs.sprg2;
790 env->spr[SPR_SPRG3] = regs.sprg3;
791 env->spr[SPR_SPRG4] = regs.sprg4;
792 env->spr[SPR_SPRG5] = regs.sprg5;
793 env->spr[SPR_SPRG6] = regs.sprg6;
794 env->spr[SPR_SPRG7] = regs.sprg7;
795
796 env->spr[SPR_BOOKE_PID] = regs.pid;
797
798 for (i = 0;i < 32; i++)
799 env->gpr[i] = regs.gpr[i];
800
801 kvm_get_fp(cs);
802
803 if (cap_booke_sregs) {
804 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
805 if (ret < 0) {
806 return ret;
807 }
808
809 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
810 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
811 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
812 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
813 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
814 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
815 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
816 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
817 env->spr[SPR_DECR] = sregs.u.e.dec;
818 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
819 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
820 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
821 }
822
823 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
824 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
825 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
826 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
827 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
828 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
829 }
830
831 if (sregs.u.e.features & KVM_SREGS_E_64) {
832 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
833 }
834
835 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
836 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
837 }
838
839 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
840 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
841 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
842 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
843 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
844 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
845 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
846 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
847 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
848 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
849 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
850 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
851 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
852 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
853 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
854 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
855 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
856
857 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
858 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
859 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
860 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
861 }
862
863 if (sregs.u.e.features & KVM_SREGS_E_PM) {
864 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
865 }
866
867 if (sregs.u.e.features & KVM_SREGS_E_PC) {
868 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
869 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
870 }
871 }
872
873 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
874 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
875 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
876 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
877 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
878 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
879 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
880 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
881 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
882 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
883 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
884 }
885
886 if (sregs.u.e.features & KVM_SREGS_EXP) {
887 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
888 }
889
890 if (sregs.u.e.features & KVM_SREGS_E_PD) {
891 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
892 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
893 }
894
895 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
896 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
897 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
898 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
899
900 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
901 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
902 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
903 }
904 }
905 }
906
907 if (cap_segstate) {
908 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
909 if (ret < 0) {
910 return ret;
911 }
912
913 ppc_store_sdr1(env, sregs.u.s.sdr1);
914
915 /* Sync SLB */
916 #ifdef TARGET_PPC64
917 for (i = 0; i < 64; i++) {
918 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
919 sregs.u.s.ppc64.slb[i].slbv);
920 }
921 #endif
922
923 /* Sync SRs */
924 for (i = 0; i < 16; i++) {
925 env->sr[i] = sregs.u.s.ppc32.sr[i];
926 }
927
928 /* Sync BATs */
929 for (i = 0; i < 8; i++) {
930 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
931 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
932 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
933 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
934 }
935 }
936
937 if (cap_hior) {
938 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
939 }
940
941 if (cap_one_reg) {
942 int i;
943
944 /* We deliberately ignore errors here, for kernels which have
945 * the ONE_REG calls, but don't support the specific
946 * registers, there's a reasonable chance things will still
947 * work, at least until we try to migrate. */
948 for (i = 0; i < 1024; i++) {
949 uint64_t id = env->spr_cb[i].one_reg_id;
950
951 if (id != 0) {
952 kvm_get_one_spr(cs, id, i);
953 }
954 }
955 }
956
957 return 0;
958 }
959
960 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
961 {
962 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
963
964 if (irq != PPC_INTERRUPT_EXT) {
965 return 0;
966 }
967
968 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
969 return 0;
970 }
971
972 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
973
974 return 0;
975 }
976
977 #if defined(TARGET_PPCEMB)
978 #define PPC_INPUT_INT PPC40x_INPUT_INT
979 #elif defined(TARGET_PPC64)
980 #define PPC_INPUT_INT PPC970_INPUT_INT
981 #else
982 #define PPC_INPUT_INT PPC6xx_INPUT_INT
983 #endif
984
985 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
986 {
987 PowerPCCPU *cpu = POWERPC_CPU(cs);
988 CPUPPCState *env = &cpu->env;
989 int r;
990 unsigned irq;
991
992 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
993 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
994 if (!cap_interrupt_level &&
995 run->ready_for_interrupt_injection &&
996 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
997 (env->irq_input_state & (1<<PPC_INPUT_INT)))
998 {
999 /* For now KVM disregards the 'irq' argument. However, in the
1000 * future KVM could cache it in-kernel to avoid a heavyweight exit
1001 * when reading the UIC.
1002 */
1003 irq = KVM_INTERRUPT_SET;
1004
1005 dprintf("injected interrupt %d\n", irq);
1006 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1007 if (r < 0) {
1008 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1009 }
1010
1011 /* Always wake up soon in case the interrupt was level based */
1012 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1013 (get_ticks_per_sec() / 50));
1014 }
1015
1016 /* We don't know if there are more interrupts pending after this. However,
1017 * the guest will return to userspace in the course of handling this one
1018 * anyways, so we will get a chance to deliver the rest. */
1019 }
1020
1021 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1022 {
1023 }
1024
1025 int kvm_arch_process_async_events(CPUState *cs)
1026 {
1027 return cs->halted;
1028 }
1029
1030 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1031 {
1032 CPUState *cs = CPU(cpu);
1033 CPUPPCState *env = &cpu->env;
1034
1035 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1036 cs->halted = 1;
1037 env->exception_index = EXCP_HLT;
1038 }
1039
1040 return 0;
1041 }
1042
1043 /* map dcr access to existing qemu dcr emulation */
1044 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1045 {
1046 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1047 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1048
1049 return 0;
1050 }
1051
1052 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1053 {
1054 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1055 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1056
1057 return 0;
1058 }
1059
1060 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1061 {
1062 PowerPCCPU *cpu = POWERPC_CPU(cs);
1063 CPUPPCState *env = &cpu->env;
1064 int ret;
1065
1066 switch (run->exit_reason) {
1067 case KVM_EXIT_DCR:
1068 if (run->dcr.is_write) {
1069 dprintf("handle dcr write\n");
1070 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1071 } else {
1072 dprintf("handle dcr read\n");
1073 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1074 }
1075 break;
1076 case KVM_EXIT_HLT:
1077 dprintf("handle halt\n");
1078 ret = kvmppc_handle_halt(cpu);
1079 break;
1080 #ifdef CONFIG_PSERIES
1081 case KVM_EXIT_PAPR_HCALL:
1082 dprintf("handle PAPR hypercall\n");
1083 run->papr_hcall.ret = spapr_hypercall(cpu,
1084 run->papr_hcall.nr,
1085 run->papr_hcall.args);
1086 ret = 0;
1087 break;
1088 #endif
1089 case KVM_EXIT_EPR:
1090 dprintf("handle epr\n");
1091 run->epr.epr = ldl_phys(env->mpic_iack);
1092 ret = 0;
1093 break;
1094 default:
1095 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1096 ret = -1;
1097 break;
1098 }
1099
1100 return ret;
1101 }
1102
1103 static int read_cpuinfo(const char *field, char *value, int len)
1104 {
1105 FILE *f;
1106 int ret = -1;
1107 int field_len = strlen(field);
1108 char line[512];
1109
1110 f = fopen("/proc/cpuinfo", "r");
1111 if (!f) {
1112 return -1;
1113 }
1114
1115 do {
1116 if(!fgets(line, sizeof(line), f)) {
1117 break;
1118 }
1119 if (!strncmp(line, field, field_len)) {
1120 pstrcpy(value, len, line);
1121 ret = 0;
1122 break;
1123 }
1124 } while(*line);
1125
1126 fclose(f);
1127
1128 return ret;
1129 }
1130
1131 uint32_t kvmppc_get_tbfreq(void)
1132 {
1133 char line[512];
1134 char *ns;
1135 uint32_t retval = get_ticks_per_sec();
1136
1137 if (read_cpuinfo("timebase", line, sizeof(line))) {
1138 return retval;
1139 }
1140
1141 if (!(ns = strchr(line, ':'))) {
1142 return retval;
1143 }
1144
1145 ns++;
1146
1147 retval = atoi(ns);
1148 return retval;
1149 }
1150
1151 /* Try to find a device tree node for a CPU with clock-frequency property */
1152 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1153 {
1154 struct dirent *dirp;
1155 DIR *dp;
1156
1157 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1158 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1159 return -1;
1160 }
1161
1162 buf[0] = '\0';
1163 while ((dirp = readdir(dp)) != NULL) {
1164 FILE *f;
1165 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1166 dirp->d_name);
1167 f = fopen(buf, "r");
1168 if (f) {
1169 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1170 fclose(f);
1171 break;
1172 }
1173 buf[0] = '\0';
1174 }
1175 closedir(dp);
1176 if (buf[0] == '\0') {
1177 printf("Unknown host!\n");
1178 return -1;
1179 }
1180
1181 return 0;
1182 }
1183
1184 /* Read a CPU node property from the host device tree that's a single
1185 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1186 * (can't find or open the property, or doesn't understand the
1187 * format) */
1188 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1189 {
1190 char buf[PATH_MAX];
1191 union {
1192 uint32_t v32;
1193 uint64_t v64;
1194 } u;
1195 FILE *f;
1196 int len;
1197
1198 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1199 return -1;
1200 }
1201
1202 strncat(buf, "/", sizeof(buf) - strlen(buf));
1203 strncat(buf, propname, sizeof(buf) - strlen(buf));
1204
1205 f = fopen(buf, "rb");
1206 if (!f) {
1207 return -1;
1208 }
1209
1210 len = fread(&u, 1, sizeof(u), f);
1211 fclose(f);
1212 switch (len) {
1213 case 4:
1214 /* property is a 32-bit quantity */
1215 return be32_to_cpu(u.v32);
1216 case 8:
1217 return be64_to_cpu(u.v64);
1218 }
1219
1220 return 0;
1221 }
1222
1223 uint64_t kvmppc_get_clockfreq(void)
1224 {
1225 return kvmppc_read_int_cpu_dt("clock-frequency");
1226 }
1227
1228 uint32_t kvmppc_get_vmx(void)
1229 {
1230 return kvmppc_read_int_cpu_dt("ibm,vmx");
1231 }
1232
1233 uint32_t kvmppc_get_dfp(void)
1234 {
1235 return kvmppc_read_int_cpu_dt("ibm,dfp");
1236 }
1237
1238 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1239 {
1240 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1241 CPUState *cs = CPU(cpu);
1242
1243 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1244 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1245 return 0;
1246 }
1247
1248 return 1;
1249 }
1250
1251 int kvmppc_get_hasidle(CPUPPCState *env)
1252 {
1253 struct kvm_ppc_pvinfo pvinfo;
1254
1255 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1256 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1257 return 1;
1258 }
1259
1260 return 0;
1261 }
1262
1263 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1264 {
1265 uint32_t *hc = (uint32_t*)buf;
1266 struct kvm_ppc_pvinfo pvinfo;
1267
1268 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1269 memcpy(buf, pvinfo.hcall, buf_len);
1270 return 0;
1271 }
1272
1273 /*
1274 * Fallback to always fail hypercalls:
1275 *
1276 * li r3, -1
1277 * nop
1278 * nop
1279 * nop
1280 */
1281
1282 hc[0] = 0x3860ffff;
1283 hc[1] = 0x60000000;
1284 hc[2] = 0x60000000;
1285 hc[3] = 0x60000000;
1286
1287 return 0;
1288 }
1289
1290 void kvmppc_set_papr(PowerPCCPU *cpu)
1291 {
1292 CPUPPCState *env = &cpu->env;
1293 CPUState *cs = CPU(cpu);
1294 struct kvm_enable_cap cap = {};
1295 int ret;
1296
1297 cap.cap = KVM_CAP_PPC_PAPR;
1298 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1299
1300 if (ret) {
1301 cpu_abort(env, "This KVM version does not support PAPR\n");
1302 }
1303 }
1304
1305 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1306 {
1307 CPUPPCState *env = &cpu->env;
1308 CPUState *cs = CPU(cpu);
1309 struct kvm_enable_cap cap = {};
1310 int ret;
1311
1312 cap.cap = KVM_CAP_PPC_EPR;
1313 cap.args[0] = mpic_proxy;
1314 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1315
1316 if (ret && mpic_proxy) {
1317 cpu_abort(env, "This KVM version does not support EPR\n");
1318 }
1319 }
1320
1321 int kvmppc_smt_threads(void)
1322 {
1323 return cap_ppc_smt ? cap_ppc_smt : 1;
1324 }
1325
1326 #ifdef TARGET_PPC64
1327 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1328 {
1329 void *rma;
1330 off_t size;
1331 int fd;
1332 struct kvm_allocate_rma ret;
1333 MemoryRegion *rma_region;
1334
1335 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1336 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1337 * not necessary on this hardware
1338 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1339 *
1340 * FIXME: We should allow the user to force contiguous RMA
1341 * allocation in the cap_ppc_rma==1 case.
1342 */
1343 if (cap_ppc_rma < 2) {
1344 return 0;
1345 }
1346
1347 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1348 if (fd < 0) {
1349 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1350 strerror(errno));
1351 return -1;
1352 }
1353
1354 size = MIN(ret.rma_size, 256ul << 20);
1355
1356 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1357 if (rma == MAP_FAILED) {
1358 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1359 return -1;
1360 };
1361
1362 rma_region = g_new(MemoryRegion, 1);
1363 memory_region_init_ram_ptr(rma_region, name, size, rma);
1364 vmstate_register_ram_global(rma_region);
1365 memory_region_add_subregion(sysmem, 0, rma_region);
1366
1367 return size;
1368 }
1369
1370 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1371 {
1372 if (cap_ppc_rma >= 2) {
1373 return current_size;
1374 }
1375 return MIN(current_size,
1376 getrampagesize() << (hash_shift - 7));
1377 }
1378 #endif
1379
1380 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1381 {
1382 struct kvm_create_spapr_tce args = {
1383 .liobn = liobn,
1384 .window_size = window_size,
1385 };
1386 long len;
1387 int fd;
1388 void *table;
1389
1390 /* Must set fd to -1 so we don't try to munmap when called for
1391 * destroying the table, which the upper layers -will- do
1392 */
1393 *pfd = -1;
1394 if (!cap_spapr_tce) {
1395 return NULL;
1396 }
1397
1398 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1399 if (fd < 0) {
1400 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1401 liobn);
1402 return NULL;
1403 }
1404
1405 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1406 /* FIXME: round this up to page size */
1407
1408 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1409 if (table == MAP_FAILED) {
1410 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1411 liobn);
1412 close(fd);
1413 return NULL;
1414 }
1415
1416 *pfd = fd;
1417 return table;
1418 }
1419
1420 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1421 {
1422 long len;
1423
1424 if (fd < 0) {
1425 return -1;
1426 }
1427
1428 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1429 if ((munmap(table, len) < 0) ||
1430 (close(fd) < 0)) {
1431 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1432 strerror(errno));
1433 /* Leak the table */
1434 }
1435
1436 return 0;
1437 }
1438
1439 int kvmppc_reset_htab(int shift_hint)
1440 {
1441 uint32_t shift = shift_hint;
1442
1443 if (!kvm_enabled()) {
1444 /* Full emulation, tell caller to allocate htab itself */
1445 return 0;
1446 }
1447 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1448 int ret;
1449 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1450 if (ret == -ENOTTY) {
1451 /* At least some versions of PR KVM advertise the
1452 * capability, but don't implement the ioctl(). Oops.
1453 * Return 0 so that we allocate the htab in qemu, as is
1454 * correct for PR. */
1455 return 0;
1456 } else if (ret < 0) {
1457 return ret;
1458 }
1459 return shift;
1460 }
1461
1462 /* We have a kernel that predates the htab reset calls. For PR
1463 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1464 * this era, it has allocated a 16MB fixed size hash table
1465 * already. Kernels of this era have the GET_PVINFO capability
1466 * only on PR, so we use this hack to determine the right
1467 * answer */
1468 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1469 /* PR - tell caller to allocate htab */
1470 return 0;
1471 } else {
1472 /* HV - assume 16MB kernel allocated htab */
1473 return 24;
1474 }
1475 }
1476
1477 static inline uint32_t mfpvr(void)
1478 {
1479 uint32_t pvr;
1480
1481 asm ("mfpvr %0"
1482 : "=r"(pvr));
1483 return pvr;
1484 }
1485
1486 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1487 {
1488 if (on) {
1489 *word |= flags;
1490 } else {
1491 *word &= ~flags;
1492 }
1493 }
1494
1495 static void kvmppc_host_cpu_initfn(Object *obj)
1496 {
1497 assert(kvm_enabled());
1498 }
1499
1500 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1501 {
1502 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1503 uint32_t vmx = kvmppc_get_vmx();
1504 uint32_t dfp = kvmppc_get_dfp();
1505
1506 /* Now fix up the class with information we can query from the host */
1507
1508 if (vmx != -1) {
1509 /* Only override when we know what the host supports */
1510 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1511 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1512 }
1513 if (dfp != -1) {
1514 /* Only override when we know what the host supports */
1515 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1516 }
1517 }
1518
1519 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1520 {
1521 CPUState *cs = CPU(cpu);
1522 int smt;
1523
1524 /* Adjust cpu index for SMT */
1525 smt = kvmppc_smt_threads();
1526 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1527 + (cs->cpu_index % smp_threads);
1528
1529 return 0;
1530 }
1531
1532 static int kvm_ppc_register_host_cpu_type(void)
1533 {
1534 TypeInfo type_info = {
1535 .name = TYPE_HOST_POWERPC_CPU,
1536 .instance_init = kvmppc_host_cpu_initfn,
1537 .class_init = kvmppc_host_cpu_class_init,
1538 };
1539 uint32_t host_pvr = mfpvr();
1540 PowerPCCPUClass *pvr_pcc;
1541
1542 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1543 if (pvr_pcc == NULL) {
1544 return -1;
1545 }
1546 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1547 type_register(&type_info);
1548 return 0;
1549 }
1550
1551
1552 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1553 {
1554 return true;
1555 }
1556
1557 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1558 {
1559 return 1;
1560 }
1561
1562 int kvm_arch_on_sigbus(int code, void *addr)
1563 {
1564 return 1;
1565 }