tests: virtio-9p: rename PCI configuration test
[qemu.git] / target / ppc / kvm.c
1 /*
2 * PowerPC implementation of KVM hooks
3 *
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
6 *
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
14 *
15 */
16
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21
22 #include <linux/kvm.h>
23
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
49 #endif
50
51 //#define DEBUG_KVM
52
53 #ifdef DEBUG_KVM
54 #define DPRINTF(fmt, ...) \
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #else
57 #define DPRINTF(fmt, ...) \
58 do { } while (0)
59 #endif
60
61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
62
63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
64 KVM_CAP_LAST_INFO
65 };
66
67 static int cap_interrupt_unset = false;
68 static int cap_interrupt_level = false;
69 static int cap_segstate;
70 static int cap_booke_sregs;
71 static int cap_ppc_smt;
72 static int cap_ppc_rma;
73 static int cap_spapr_tce;
74 static int cap_spapr_multitce;
75 static int cap_spapr_vfio;
76 static int cap_hior;
77 static int cap_one_reg;
78 static int cap_epr;
79 static int cap_ppc_watchdog;
80 static int cap_papr;
81 static int cap_htab_fd;
82 static int cap_fixup_hcalls;
83 static int cap_htm; /* Hardware transactional memory support */
84
85 static uint32_t debug_inst_opcode;
86
87 /* XXX We have a race condition where we actually have a level triggered
88 * interrupt, but the infrastructure can't expose that yet, so the guest
89 * takes but ignores it, goes to sleep and never gets notified that there's
90 * still an interrupt pending.
91 *
92 * As a quick workaround, let's just wake up again 20 ms after we injected
93 * an interrupt. That way we can assure that we're always reinjecting
94 * interrupts in case the guest swallowed them.
95 */
96 static QEMUTimer *idle_timer;
97
98 static void kvm_kick_cpu(void *opaque)
99 {
100 PowerPCCPU *cpu = opaque;
101
102 qemu_cpu_kick(CPU(cpu));
103 }
104
105 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
106 * should only be used for fallback tests - generally we should use
107 * explicit capabilities for the features we want, rather than
108 * assuming what is/isn't available depending on the KVM variant. */
109 static bool kvmppc_is_pr(KVMState *ks)
110 {
111 /* Assume KVM-PR if the GET_PVINFO capability is available */
112 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
113 }
114
115 static int kvm_ppc_register_host_cpu_type(void);
116
117 int kvm_arch_init(MachineState *ms, KVMState *s)
118 {
119 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
120 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
121 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
122 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
123 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
124 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
125 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
126 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
127 cap_spapr_vfio = false;
128 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
129 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
130 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
131 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
132 /* Note: we don't set cap_papr here, because this capability is
133 * only activated after this by kvmppc_set_papr() */
134 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
135 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
136 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
137
138 if (!cap_interrupt_level) {
139 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
140 "VM to stall at times!\n");
141 }
142
143 kvm_ppc_register_host_cpu_type();
144
145 return 0;
146 }
147
148 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
149 {
150 CPUPPCState *cenv = &cpu->env;
151 CPUState *cs = CPU(cpu);
152 struct kvm_sregs sregs;
153 int ret;
154
155 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
156 /* What we're really trying to say is "if we're on BookE, we use
157 the native PVR for now". This is the only sane way to check
158 it though, so we potentially confuse users that they can run
159 BookE guests on BookS. Let's hope nobody dares enough :) */
160 return 0;
161 } else {
162 if (!cap_segstate) {
163 fprintf(stderr, "kvm error: missing PVR setting capability\n");
164 return -ENOSYS;
165 }
166 }
167
168 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
169 if (ret) {
170 return ret;
171 }
172
173 sregs.pvr = cenv->spr[SPR_PVR];
174 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
175 }
176
177 /* Set up a shared TLB array with KVM */
178 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
179 {
180 CPUPPCState *env = &cpu->env;
181 CPUState *cs = CPU(cpu);
182 struct kvm_book3e_206_tlb_params params = {};
183 struct kvm_config_tlb cfg = {};
184 unsigned int entries = 0;
185 int ret, i;
186
187 if (!kvm_enabled() ||
188 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
189 return 0;
190 }
191
192 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
193
194 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
195 params.tlb_sizes[i] = booke206_tlb_size(env, i);
196 params.tlb_ways[i] = booke206_tlb_ways(env, i);
197 entries += params.tlb_sizes[i];
198 }
199
200 assert(entries == env->nb_tlb);
201 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
202
203 env->tlb_dirty = true;
204
205 cfg.array = (uintptr_t)env->tlb.tlbm;
206 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
207 cfg.params = (uintptr_t)&params;
208 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
209
210 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
211 if (ret < 0) {
212 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
213 __func__, strerror(-ret));
214 return ret;
215 }
216
217 env->kvm_sw_tlb = true;
218 return 0;
219 }
220
221
222 #if defined(TARGET_PPC64)
223 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
224 struct kvm_ppc_smmu_info *info)
225 {
226 CPUPPCState *env = &cpu->env;
227 CPUState *cs = CPU(cpu);
228
229 memset(info, 0, sizeof(*info));
230
231 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
232 * need to "guess" what the supported page sizes are.
233 *
234 * For that to work we make a few assumptions:
235 *
236 * - Check whether we are running "PR" KVM which only supports 4K
237 * and 16M pages, but supports them regardless of the backing
238 * store characteritics. We also don't support 1T segments.
239 *
240 * This is safe as if HV KVM ever supports that capability or PR
241 * KVM grows supports for more page/segment sizes, those versions
242 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
243 * will not hit this fallback
244 *
245 * - Else we are running HV KVM. This means we only support page
246 * sizes that fit in the backing store. Additionally we only
247 * advertize 64K pages if the processor is ARCH 2.06 and we assume
248 * P7 encodings for the SLB and hash table. Here too, we assume
249 * support for any newer processor will mean a kernel that
250 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
251 * this fallback.
252 */
253 if (kvmppc_is_pr(cs->kvm_state)) {
254 /* No flags */
255 info->flags = 0;
256 info->slb_size = 64;
257
258 /* Standard 4k base page size segment */
259 info->sps[0].page_shift = 12;
260 info->sps[0].slb_enc = 0;
261 info->sps[0].enc[0].page_shift = 12;
262 info->sps[0].enc[0].pte_enc = 0;
263
264 /* Standard 16M large page size segment */
265 info->sps[1].page_shift = 24;
266 info->sps[1].slb_enc = SLB_VSID_L;
267 info->sps[1].enc[0].page_shift = 24;
268 info->sps[1].enc[0].pte_enc = 0;
269 } else {
270 int i = 0;
271
272 /* HV KVM has backing store size restrictions */
273 info->flags = KVM_PPC_PAGE_SIZES_REAL;
274
275 if (env->mmu_model & POWERPC_MMU_1TSEG) {
276 info->flags |= KVM_PPC_1T_SEGMENTS;
277 }
278
279 if (env->mmu_model == POWERPC_MMU_2_06 ||
280 env->mmu_model == POWERPC_MMU_2_07) {
281 info->slb_size = 32;
282 } else {
283 info->slb_size = 64;
284 }
285
286 /* Standard 4k base page size segment */
287 info->sps[i].page_shift = 12;
288 info->sps[i].slb_enc = 0;
289 info->sps[i].enc[0].page_shift = 12;
290 info->sps[i].enc[0].pte_enc = 0;
291 i++;
292
293 /* 64K on MMU 2.06 and later */
294 if (env->mmu_model == POWERPC_MMU_2_06 ||
295 env->mmu_model == POWERPC_MMU_2_07) {
296 info->sps[i].page_shift = 16;
297 info->sps[i].slb_enc = 0x110;
298 info->sps[i].enc[0].page_shift = 16;
299 info->sps[i].enc[0].pte_enc = 1;
300 i++;
301 }
302
303 /* Standard 16M large page size segment */
304 info->sps[i].page_shift = 24;
305 info->sps[i].slb_enc = SLB_VSID_L;
306 info->sps[i].enc[0].page_shift = 24;
307 info->sps[i].enc[0].pte_enc = 0;
308 }
309 }
310
311 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
312 {
313 CPUState *cs = CPU(cpu);
314 int ret;
315
316 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
317 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
318 if (ret == 0) {
319 return;
320 }
321 }
322
323 kvm_get_fallback_smmu_info(cpu, info);
324 }
325
326 static long gethugepagesize(const char *mem_path)
327 {
328 struct statfs fs;
329 int ret;
330
331 do {
332 ret = statfs(mem_path, &fs);
333 } while (ret != 0 && errno == EINTR);
334
335 if (ret != 0) {
336 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
337 strerror(errno));
338 exit(1);
339 }
340
341 #define HUGETLBFS_MAGIC 0x958458f6
342
343 if (fs.f_type != HUGETLBFS_MAGIC) {
344 /* Explicit mempath, but it's ordinary pages */
345 return getpagesize();
346 }
347
348 /* It's hugepage, return the huge page size */
349 return fs.f_bsize;
350 }
351
352 /*
353 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
354 * may or may not name the same files / on the same filesystem now as
355 * when we actually open and map them. Iterate over the file
356 * descriptors instead, and use qemu_fd_getpagesize().
357 */
358 static int find_max_supported_pagesize(Object *obj, void *opaque)
359 {
360 char *mem_path;
361 long *hpsize_min = opaque;
362
363 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
364 mem_path = object_property_get_str(obj, "mem-path", NULL);
365 if (mem_path) {
366 long hpsize = gethugepagesize(mem_path);
367 if (hpsize < *hpsize_min) {
368 *hpsize_min = hpsize;
369 }
370 } else {
371 *hpsize_min = getpagesize();
372 }
373 }
374
375 return 0;
376 }
377
378 static long getrampagesize(void)
379 {
380 long hpsize = LONG_MAX;
381 long mainrampagesize;
382 Object *memdev_root;
383
384 if (mem_path) {
385 mainrampagesize = gethugepagesize(mem_path);
386 } else {
387 mainrampagesize = getpagesize();
388 }
389
390 /* it's possible we have memory-backend objects with
391 * hugepage-backed RAM. these may get mapped into system
392 * address space via -numa parameters or memory hotplug
393 * hooks. we want to take these into account, but we
394 * also want to make sure these supported hugepage
395 * sizes are applicable across the entire range of memory
396 * we may boot from, so we take the min across all
397 * backends, and assume normal pages in cases where a
398 * backend isn't backed by hugepages.
399 */
400 memdev_root = object_resolve_path("/objects", NULL);
401 if (memdev_root) {
402 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
403 }
404 if (hpsize == LONG_MAX) {
405 /* No additional memory regions found ==> Report main RAM page size */
406 return mainrampagesize;
407 }
408
409 /* If NUMA is disabled or the NUMA nodes are not backed with a
410 * memory-backend, then there is at least one node using "normal" RAM,
411 * so if its page size is smaller we have got to report that size instead.
412 */
413 if (hpsize > mainrampagesize &&
414 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
415 static bool warned;
416 if (!warned) {
417 error_report("Huge page support disabled (n/a for main memory).");
418 warned = true;
419 }
420 return mainrampagesize;
421 }
422
423 return hpsize;
424 }
425
426 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
427 {
428 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
429 return true;
430 }
431
432 return (1ul << shift) <= rampgsize;
433 }
434
435 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
436 {
437 static struct kvm_ppc_smmu_info smmu_info;
438 static bool has_smmu_info;
439 CPUPPCState *env = &cpu->env;
440 long rampagesize;
441 int iq, ik, jq, jk;
442 bool has_64k_pages = false;
443
444 /* We only handle page sizes for 64-bit server guests for now */
445 if (!(env->mmu_model & POWERPC_MMU_64)) {
446 return;
447 }
448
449 /* Collect MMU info from kernel if not already */
450 if (!has_smmu_info) {
451 kvm_get_smmu_info(cpu, &smmu_info);
452 has_smmu_info = true;
453 }
454
455 rampagesize = getrampagesize();
456
457 /* Convert to QEMU form */
458 memset(&env->sps, 0, sizeof(env->sps));
459
460 /* If we have HV KVM, we need to forbid CI large pages if our
461 * host page size is smaller than 64K.
462 */
463 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
464 env->ci_large_pages = getpagesize() >= 0x10000;
465 }
466
467 /*
468 * XXX This loop should be an entry wide AND of the capabilities that
469 * the selected CPU has with the capabilities that KVM supports.
470 */
471 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
472 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
473 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
474
475 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
476 ksps->page_shift)) {
477 continue;
478 }
479 qsps->page_shift = ksps->page_shift;
480 qsps->slb_enc = ksps->slb_enc;
481 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
482 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
483 ksps->enc[jk].page_shift)) {
484 continue;
485 }
486 if (ksps->enc[jk].page_shift == 16) {
487 has_64k_pages = true;
488 }
489 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
490 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
491 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
492 break;
493 }
494 }
495 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
496 break;
497 }
498 }
499 env->slb_nr = smmu_info.slb_size;
500 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
501 env->mmu_model &= ~POWERPC_MMU_1TSEG;
502 }
503 if (!has_64k_pages) {
504 env->mmu_model &= ~POWERPC_MMU_64K;
505 }
506 }
507 #else /* defined (TARGET_PPC64) */
508
509 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
510 {
511 }
512
513 #endif /* !defined (TARGET_PPC64) */
514
515 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
516 {
517 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
518 }
519
520 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
521 * book3s supports only 1 watchpoint, so array size
522 * of 4 is sufficient for now.
523 */
524 #define MAX_HW_BKPTS 4
525
526 static struct HWBreakpoint {
527 target_ulong addr;
528 int type;
529 } hw_debug_points[MAX_HW_BKPTS];
530
531 static CPUWatchpoint hw_watchpoint;
532
533 /* Default there is no breakpoint and watchpoint supported */
534 static int max_hw_breakpoint;
535 static int max_hw_watchpoint;
536 static int nb_hw_breakpoint;
537 static int nb_hw_watchpoint;
538
539 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
540 {
541 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
542 max_hw_breakpoint = 2;
543 max_hw_watchpoint = 2;
544 }
545
546 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
547 fprintf(stderr, "Error initializing h/w breakpoints\n");
548 return;
549 }
550 }
551
552 int kvm_arch_init_vcpu(CPUState *cs)
553 {
554 PowerPCCPU *cpu = POWERPC_CPU(cs);
555 CPUPPCState *cenv = &cpu->env;
556 int ret;
557
558 /* Gather server mmu info from KVM and update the CPU state */
559 kvm_fixup_page_sizes(cpu);
560
561 /* Synchronize sregs with kvm */
562 ret = kvm_arch_sync_sregs(cpu);
563 if (ret) {
564 if (ret == -EINVAL) {
565 error_report("Register sync failed... If you're using kvm-hv.ko,"
566 " only \"-cpu host\" is possible");
567 }
568 return ret;
569 }
570
571 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
572
573 switch (cenv->mmu_model) {
574 case POWERPC_MMU_BOOKE206:
575 /* This target supports access to KVM's guest TLB */
576 ret = kvm_booke206_tlb_init(cpu);
577 break;
578 case POWERPC_MMU_2_07:
579 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
580 /* KVM-HV has transactional memory on POWER8 also without the
581 * KVM_CAP_PPC_HTM extension, so enable it here instead. */
582 cap_htm = true;
583 }
584 break;
585 default:
586 break;
587 }
588
589 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
590 kvmppc_hw_debug_points_init(cenv);
591
592 return ret;
593 }
594
595 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
596 {
597 CPUPPCState *env = &cpu->env;
598 CPUState *cs = CPU(cpu);
599 struct kvm_dirty_tlb dirty_tlb;
600 unsigned char *bitmap;
601 int ret;
602
603 if (!env->kvm_sw_tlb) {
604 return;
605 }
606
607 bitmap = g_malloc((env->nb_tlb + 7) / 8);
608 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
609
610 dirty_tlb.bitmap = (uintptr_t)bitmap;
611 dirty_tlb.num_dirty = env->nb_tlb;
612
613 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
614 if (ret) {
615 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
616 __func__, strerror(-ret));
617 }
618
619 g_free(bitmap);
620 }
621
622 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
623 {
624 PowerPCCPU *cpu = POWERPC_CPU(cs);
625 CPUPPCState *env = &cpu->env;
626 union {
627 uint32_t u32;
628 uint64_t u64;
629 } val;
630 struct kvm_one_reg reg = {
631 .id = id,
632 .addr = (uintptr_t) &val,
633 };
634 int ret;
635
636 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
637 if (ret != 0) {
638 trace_kvm_failed_spr_get(spr, strerror(errno));
639 } else {
640 switch (id & KVM_REG_SIZE_MASK) {
641 case KVM_REG_SIZE_U32:
642 env->spr[spr] = val.u32;
643 break;
644
645 case KVM_REG_SIZE_U64:
646 env->spr[spr] = val.u64;
647 break;
648
649 default:
650 /* Don't handle this size yet */
651 abort();
652 }
653 }
654 }
655
656 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
657 {
658 PowerPCCPU *cpu = POWERPC_CPU(cs);
659 CPUPPCState *env = &cpu->env;
660 union {
661 uint32_t u32;
662 uint64_t u64;
663 } val;
664 struct kvm_one_reg reg = {
665 .id = id,
666 .addr = (uintptr_t) &val,
667 };
668 int ret;
669
670 switch (id & KVM_REG_SIZE_MASK) {
671 case KVM_REG_SIZE_U32:
672 val.u32 = env->spr[spr];
673 break;
674
675 case KVM_REG_SIZE_U64:
676 val.u64 = env->spr[spr];
677 break;
678
679 default:
680 /* Don't handle this size yet */
681 abort();
682 }
683
684 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
685 if (ret != 0) {
686 trace_kvm_failed_spr_set(spr, strerror(errno));
687 }
688 }
689
690 static int kvm_put_fp(CPUState *cs)
691 {
692 PowerPCCPU *cpu = POWERPC_CPU(cs);
693 CPUPPCState *env = &cpu->env;
694 struct kvm_one_reg reg;
695 int i;
696 int ret;
697
698 if (env->insns_flags & PPC_FLOAT) {
699 uint64_t fpscr = env->fpscr;
700 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
701
702 reg.id = KVM_REG_PPC_FPSCR;
703 reg.addr = (uintptr_t)&fpscr;
704 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
705 if (ret < 0) {
706 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
707 return ret;
708 }
709
710 for (i = 0; i < 32; i++) {
711 uint64_t vsr[2];
712
713 #ifdef HOST_WORDS_BIGENDIAN
714 vsr[0] = float64_val(env->fpr[i]);
715 vsr[1] = env->vsr[i];
716 #else
717 vsr[0] = env->vsr[i];
718 vsr[1] = float64_val(env->fpr[i]);
719 #endif
720 reg.addr = (uintptr_t) &vsr;
721 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
722
723 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
724 if (ret < 0) {
725 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
726 i, strerror(errno));
727 return ret;
728 }
729 }
730 }
731
732 if (env->insns_flags & PPC_ALTIVEC) {
733 reg.id = KVM_REG_PPC_VSCR;
734 reg.addr = (uintptr_t)&env->vscr;
735 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
736 if (ret < 0) {
737 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
738 return ret;
739 }
740
741 for (i = 0; i < 32; i++) {
742 reg.id = KVM_REG_PPC_VR(i);
743 reg.addr = (uintptr_t)&env->avr[i];
744 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745 if (ret < 0) {
746 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
747 return ret;
748 }
749 }
750 }
751
752 return 0;
753 }
754
755 static int kvm_get_fp(CPUState *cs)
756 {
757 PowerPCCPU *cpu = POWERPC_CPU(cs);
758 CPUPPCState *env = &cpu->env;
759 struct kvm_one_reg reg;
760 int i;
761 int ret;
762
763 if (env->insns_flags & PPC_FLOAT) {
764 uint64_t fpscr;
765 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
766
767 reg.id = KVM_REG_PPC_FPSCR;
768 reg.addr = (uintptr_t)&fpscr;
769 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770 if (ret < 0) {
771 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
772 return ret;
773 } else {
774 env->fpscr = fpscr;
775 }
776
777 for (i = 0; i < 32; i++) {
778 uint64_t vsr[2];
779
780 reg.addr = (uintptr_t) &vsr;
781 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
782
783 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784 if (ret < 0) {
785 DPRINTF("Unable to get %s%d from KVM: %s\n",
786 vsx ? "VSR" : "FPR", i, strerror(errno));
787 return ret;
788 } else {
789 #ifdef HOST_WORDS_BIGENDIAN
790 env->fpr[i] = vsr[0];
791 if (vsx) {
792 env->vsr[i] = vsr[1];
793 }
794 #else
795 env->fpr[i] = vsr[1];
796 if (vsx) {
797 env->vsr[i] = vsr[0];
798 }
799 #endif
800 }
801 }
802 }
803
804 if (env->insns_flags & PPC_ALTIVEC) {
805 reg.id = KVM_REG_PPC_VSCR;
806 reg.addr = (uintptr_t)&env->vscr;
807 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
808 if (ret < 0) {
809 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
810 return ret;
811 }
812
813 for (i = 0; i < 32; i++) {
814 reg.id = KVM_REG_PPC_VR(i);
815 reg.addr = (uintptr_t)&env->avr[i];
816 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
817 if (ret < 0) {
818 DPRINTF("Unable to get VR%d from KVM: %s\n",
819 i, strerror(errno));
820 return ret;
821 }
822 }
823 }
824
825 return 0;
826 }
827
828 #if defined(TARGET_PPC64)
829 static int kvm_get_vpa(CPUState *cs)
830 {
831 PowerPCCPU *cpu = POWERPC_CPU(cs);
832 CPUPPCState *env = &cpu->env;
833 struct kvm_one_reg reg;
834 int ret;
835
836 reg.id = KVM_REG_PPC_VPA_ADDR;
837 reg.addr = (uintptr_t)&env->vpa_addr;
838 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
839 if (ret < 0) {
840 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
841 return ret;
842 }
843
844 assert((uintptr_t)&env->slb_shadow_size
845 == ((uintptr_t)&env->slb_shadow_addr + 8));
846 reg.id = KVM_REG_PPC_VPA_SLB;
847 reg.addr = (uintptr_t)&env->slb_shadow_addr;
848 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
849 if (ret < 0) {
850 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
851 strerror(errno));
852 return ret;
853 }
854
855 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
856 reg.id = KVM_REG_PPC_VPA_DTL;
857 reg.addr = (uintptr_t)&env->dtl_addr;
858 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
859 if (ret < 0) {
860 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
861 strerror(errno));
862 return ret;
863 }
864
865 return 0;
866 }
867
868 static int kvm_put_vpa(CPUState *cs)
869 {
870 PowerPCCPU *cpu = POWERPC_CPU(cs);
871 CPUPPCState *env = &cpu->env;
872 struct kvm_one_reg reg;
873 int ret;
874
875 /* SLB shadow or DTL can't be registered unless a master VPA is
876 * registered. That means when restoring state, if a VPA *is*
877 * registered, we need to set that up first. If not, we need to
878 * deregister the others before deregistering the master VPA */
879 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
880
881 if (env->vpa_addr) {
882 reg.id = KVM_REG_PPC_VPA_ADDR;
883 reg.addr = (uintptr_t)&env->vpa_addr;
884 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
885 if (ret < 0) {
886 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
887 return ret;
888 }
889 }
890
891 assert((uintptr_t)&env->slb_shadow_size
892 == ((uintptr_t)&env->slb_shadow_addr + 8));
893 reg.id = KVM_REG_PPC_VPA_SLB;
894 reg.addr = (uintptr_t)&env->slb_shadow_addr;
895 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
896 if (ret < 0) {
897 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
898 return ret;
899 }
900
901 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
902 reg.id = KVM_REG_PPC_VPA_DTL;
903 reg.addr = (uintptr_t)&env->dtl_addr;
904 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
905 if (ret < 0) {
906 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
907 strerror(errno));
908 return ret;
909 }
910
911 if (!env->vpa_addr) {
912 reg.id = KVM_REG_PPC_VPA_ADDR;
913 reg.addr = (uintptr_t)&env->vpa_addr;
914 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
915 if (ret < 0) {
916 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
917 return ret;
918 }
919 }
920
921 return 0;
922 }
923 #endif /* TARGET_PPC64 */
924
925 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
926 {
927 CPUPPCState *env = &cpu->env;
928 struct kvm_sregs sregs;
929 int i;
930
931 sregs.pvr = env->spr[SPR_PVR];
932
933 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
934
935 /* Sync SLB */
936 #ifdef TARGET_PPC64
937 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
938 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
939 if (env->slb[i].esid & SLB_ESID_V) {
940 sregs.u.s.ppc64.slb[i].slbe |= i;
941 }
942 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
943 }
944 #endif
945
946 /* Sync SRs */
947 for (i = 0; i < 16; i++) {
948 sregs.u.s.ppc32.sr[i] = env->sr[i];
949 }
950
951 /* Sync BATs */
952 for (i = 0; i < 8; i++) {
953 /* Beware. We have to swap upper and lower bits here */
954 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
955 | env->DBAT[1][i];
956 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
957 | env->IBAT[1][i];
958 }
959
960 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
961 }
962
963 int kvm_arch_put_registers(CPUState *cs, int level)
964 {
965 PowerPCCPU *cpu = POWERPC_CPU(cs);
966 CPUPPCState *env = &cpu->env;
967 struct kvm_regs regs;
968 int ret;
969 int i;
970
971 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
972 if (ret < 0) {
973 return ret;
974 }
975
976 regs.ctr = env->ctr;
977 regs.lr = env->lr;
978 regs.xer = cpu_read_xer(env);
979 regs.msr = env->msr;
980 regs.pc = env->nip;
981
982 regs.srr0 = env->spr[SPR_SRR0];
983 regs.srr1 = env->spr[SPR_SRR1];
984
985 regs.sprg0 = env->spr[SPR_SPRG0];
986 regs.sprg1 = env->spr[SPR_SPRG1];
987 regs.sprg2 = env->spr[SPR_SPRG2];
988 regs.sprg3 = env->spr[SPR_SPRG3];
989 regs.sprg4 = env->spr[SPR_SPRG4];
990 regs.sprg5 = env->spr[SPR_SPRG5];
991 regs.sprg6 = env->spr[SPR_SPRG6];
992 regs.sprg7 = env->spr[SPR_SPRG7];
993
994 regs.pid = env->spr[SPR_BOOKE_PID];
995
996 for (i = 0;i < 32; i++)
997 regs.gpr[i] = env->gpr[i];
998
999 regs.cr = 0;
1000 for (i = 0; i < 8; i++) {
1001 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1002 }
1003
1004 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1005 if (ret < 0)
1006 return ret;
1007
1008 kvm_put_fp(cs);
1009
1010 if (env->tlb_dirty) {
1011 kvm_sw_tlb_put(cpu);
1012 env->tlb_dirty = false;
1013 }
1014
1015 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1016 ret = kvmppc_put_books_sregs(cpu);
1017 if (ret < 0) {
1018 return ret;
1019 }
1020 }
1021
1022 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1023 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1024 }
1025
1026 if (cap_one_reg) {
1027 int i;
1028
1029 /* We deliberately ignore errors here, for kernels which have
1030 * the ONE_REG calls, but don't support the specific
1031 * registers, there's a reasonable chance things will still
1032 * work, at least until we try to migrate. */
1033 for (i = 0; i < 1024; i++) {
1034 uint64_t id = env->spr_cb[i].one_reg_id;
1035
1036 if (id != 0) {
1037 kvm_put_one_spr(cs, id, i);
1038 }
1039 }
1040
1041 #ifdef TARGET_PPC64
1042 if (msr_ts) {
1043 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1044 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1045 }
1046 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1047 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1048 }
1049 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1050 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1051 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1059 }
1060
1061 if (cap_papr) {
1062 if (kvm_put_vpa(cs) < 0) {
1063 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1064 }
1065 }
1066
1067 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1068 #endif /* TARGET_PPC64 */
1069 }
1070
1071 return ret;
1072 }
1073
1074 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1075 {
1076 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1077 }
1078
1079 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1080 {
1081 CPUPPCState *env = &cpu->env;
1082 struct kvm_sregs sregs;
1083 int ret;
1084
1085 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1086 if (ret < 0) {
1087 return ret;
1088 }
1089
1090 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1091 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1092 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1093 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1094 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1095 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1096 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1097 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1098 env->spr[SPR_DECR] = sregs.u.e.dec;
1099 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1100 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1101 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1102 }
1103
1104 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1105 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1106 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1107 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1108 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1109 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1110 }
1111
1112 if (sregs.u.e.features & KVM_SREGS_E_64) {
1113 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1114 }
1115
1116 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1117 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1118 }
1119
1120 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1121 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1122 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1123 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1124 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1125 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1126 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1127 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1128 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1129 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1130 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1131 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1132 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1133 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1134 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1135 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1136 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1137 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1138 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1139 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1140 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1141 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1142 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1143 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1144 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1145 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1146 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1147 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1148 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1149 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1150 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1151 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1152 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1153
1154 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1155 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1156 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1157 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1158 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1159 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1160 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1161 }
1162
1163 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1164 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1165 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1166 }
1167
1168 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1169 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1170 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1171 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1172 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1173 }
1174 }
1175
1176 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1177 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1178 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1179 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1180 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1181 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1182 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1183 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1184 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1185 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1186 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1187 }
1188
1189 if (sregs.u.e.features & KVM_SREGS_EXP) {
1190 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1191 }
1192
1193 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1194 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1195 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1196 }
1197
1198 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1199 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1200 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1201 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1202
1203 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1204 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1205 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1206 }
1207 }
1208
1209 return 0;
1210 }
1211
1212 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1213 {
1214 CPUPPCState *env = &cpu->env;
1215 struct kvm_sregs sregs;
1216 int ret;
1217 int i;
1218
1219 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1220 if (ret < 0) {
1221 return ret;
1222 }
1223
1224 if (!env->external_htab) {
1225 ppc_store_sdr1(env, sregs.u.s.sdr1);
1226 }
1227
1228 /* Sync SLB */
1229 #ifdef TARGET_PPC64
1230 /*
1231 * The packed SLB array we get from KVM_GET_SREGS only contains
1232 * information about valid entries. So we flush our internal copy
1233 * to get rid of stale ones, then put all valid SLB entries back
1234 * in.
1235 */
1236 memset(env->slb, 0, sizeof(env->slb));
1237 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1238 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1239 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1240 /*
1241 * Only restore valid entries
1242 */
1243 if (rb & SLB_ESID_V) {
1244 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1245 }
1246 }
1247 #endif
1248
1249 /* Sync SRs */
1250 for (i = 0; i < 16; i++) {
1251 env->sr[i] = sregs.u.s.ppc32.sr[i];
1252 }
1253
1254 /* Sync BATs */
1255 for (i = 0; i < 8; i++) {
1256 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1257 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1258 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1259 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1260 }
1261
1262 return 0;
1263 }
1264
1265 int kvm_arch_get_registers(CPUState *cs)
1266 {
1267 PowerPCCPU *cpu = POWERPC_CPU(cs);
1268 CPUPPCState *env = &cpu->env;
1269 struct kvm_regs regs;
1270 uint32_t cr;
1271 int i, ret;
1272
1273 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1274 if (ret < 0)
1275 return ret;
1276
1277 cr = regs.cr;
1278 for (i = 7; i >= 0; i--) {
1279 env->crf[i] = cr & 15;
1280 cr >>= 4;
1281 }
1282
1283 env->ctr = regs.ctr;
1284 env->lr = regs.lr;
1285 cpu_write_xer(env, regs.xer);
1286 env->msr = regs.msr;
1287 env->nip = regs.pc;
1288
1289 env->spr[SPR_SRR0] = regs.srr0;
1290 env->spr[SPR_SRR1] = regs.srr1;
1291
1292 env->spr[SPR_SPRG0] = regs.sprg0;
1293 env->spr[SPR_SPRG1] = regs.sprg1;
1294 env->spr[SPR_SPRG2] = regs.sprg2;
1295 env->spr[SPR_SPRG3] = regs.sprg3;
1296 env->spr[SPR_SPRG4] = regs.sprg4;
1297 env->spr[SPR_SPRG5] = regs.sprg5;
1298 env->spr[SPR_SPRG6] = regs.sprg6;
1299 env->spr[SPR_SPRG7] = regs.sprg7;
1300
1301 env->spr[SPR_BOOKE_PID] = regs.pid;
1302
1303 for (i = 0;i < 32; i++)
1304 env->gpr[i] = regs.gpr[i];
1305
1306 kvm_get_fp(cs);
1307
1308 if (cap_booke_sregs) {
1309 ret = kvmppc_get_booke_sregs(cpu);
1310 if (ret < 0) {
1311 return ret;
1312 }
1313 }
1314
1315 if (cap_segstate) {
1316 ret = kvmppc_get_books_sregs(cpu);
1317 if (ret < 0) {
1318 return ret;
1319 }
1320 }
1321
1322 if (cap_hior) {
1323 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1324 }
1325
1326 if (cap_one_reg) {
1327 int i;
1328
1329 /* We deliberately ignore errors here, for kernels which have
1330 * the ONE_REG calls, but don't support the specific
1331 * registers, there's a reasonable chance things will still
1332 * work, at least until we try to migrate. */
1333 for (i = 0; i < 1024; i++) {
1334 uint64_t id = env->spr_cb[i].one_reg_id;
1335
1336 if (id != 0) {
1337 kvm_get_one_spr(cs, id, i);
1338 }
1339 }
1340
1341 #ifdef TARGET_PPC64
1342 if (msr_ts) {
1343 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1344 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1345 }
1346 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1347 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1348 }
1349 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1350 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1351 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1359 }
1360
1361 if (cap_papr) {
1362 if (kvm_get_vpa(cs) < 0) {
1363 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1364 }
1365 }
1366
1367 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1368 #endif
1369 }
1370
1371 return 0;
1372 }
1373
1374 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1375 {
1376 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1377
1378 if (irq != PPC_INTERRUPT_EXT) {
1379 return 0;
1380 }
1381
1382 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1383 return 0;
1384 }
1385
1386 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1387
1388 return 0;
1389 }
1390
1391 #if defined(TARGET_PPCEMB)
1392 #define PPC_INPUT_INT PPC40x_INPUT_INT
1393 #elif defined(TARGET_PPC64)
1394 #define PPC_INPUT_INT PPC970_INPUT_INT
1395 #else
1396 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1397 #endif
1398
1399 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1400 {
1401 PowerPCCPU *cpu = POWERPC_CPU(cs);
1402 CPUPPCState *env = &cpu->env;
1403 int r;
1404 unsigned irq;
1405
1406 qemu_mutex_lock_iothread();
1407
1408 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1409 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1410 if (!cap_interrupt_level &&
1411 run->ready_for_interrupt_injection &&
1412 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1413 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1414 {
1415 /* For now KVM disregards the 'irq' argument. However, in the
1416 * future KVM could cache it in-kernel to avoid a heavyweight exit
1417 * when reading the UIC.
1418 */
1419 irq = KVM_INTERRUPT_SET;
1420
1421 DPRINTF("injected interrupt %d\n", irq);
1422 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1423 if (r < 0) {
1424 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1425 }
1426
1427 /* Always wake up soon in case the interrupt was level based */
1428 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1429 (NANOSECONDS_PER_SECOND / 50));
1430 }
1431
1432 /* We don't know if there are more interrupts pending after this. However,
1433 * the guest will return to userspace in the course of handling this one
1434 * anyways, so we will get a chance to deliver the rest. */
1435
1436 qemu_mutex_unlock_iothread();
1437 }
1438
1439 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1440 {
1441 return MEMTXATTRS_UNSPECIFIED;
1442 }
1443
1444 int kvm_arch_process_async_events(CPUState *cs)
1445 {
1446 return cs->halted;
1447 }
1448
1449 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1450 {
1451 CPUState *cs = CPU(cpu);
1452 CPUPPCState *env = &cpu->env;
1453
1454 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1455 cs->halted = 1;
1456 cs->exception_index = EXCP_HLT;
1457 }
1458
1459 return 0;
1460 }
1461
1462 /* map dcr access to existing qemu dcr emulation */
1463 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1464 {
1465 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1466 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1467
1468 return 0;
1469 }
1470
1471 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1472 {
1473 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1474 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1475
1476 return 0;
1477 }
1478
1479 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1480 {
1481 /* Mixed endian case is not handled */
1482 uint32_t sc = debug_inst_opcode;
1483
1484 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1485 sizeof(sc), 0) ||
1486 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1487 return -EINVAL;
1488 }
1489
1490 return 0;
1491 }
1492
1493 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1494 {
1495 uint32_t sc;
1496
1497 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1498 sc != debug_inst_opcode ||
1499 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1500 sizeof(sc), 1)) {
1501 return -EINVAL;
1502 }
1503
1504 return 0;
1505 }
1506
1507 static int find_hw_breakpoint(target_ulong addr, int type)
1508 {
1509 int n;
1510
1511 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1512 <= ARRAY_SIZE(hw_debug_points));
1513
1514 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1515 if (hw_debug_points[n].addr == addr &&
1516 hw_debug_points[n].type == type) {
1517 return n;
1518 }
1519 }
1520
1521 return -1;
1522 }
1523
1524 static int find_hw_watchpoint(target_ulong addr, int *flag)
1525 {
1526 int n;
1527
1528 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1529 if (n >= 0) {
1530 *flag = BP_MEM_ACCESS;
1531 return n;
1532 }
1533
1534 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1535 if (n >= 0) {
1536 *flag = BP_MEM_WRITE;
1537 return n;
1538 }
1539
1540 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1541 if (n >= 0) {
1542 *flag = BP_MEM_READ;
1543 return n;
1544 }
1545
1546 return -1;
1547 }
1548
1549 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1550 target_ulong len, int type)
1551 {
1552 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1553 return -ENOBUFS;
1554 }
1555
1556 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1557 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1558
1559 switch (type) {
1560 case GDB_BREAKPOINT_HW:
1561 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1562 return -ENOBUFS;
1563 }
1564
1565 if (find_hw_breakpoint(addr, type) >= 0) {
1566 return -EEXIST;
1567 }
1568
1569 nb_hw_breakpoint++;
1570 break;
1571
1572 case GDB_WATCHPOINT_WRITE:
1573 case GDB_WATCHPOINT_READ:
1574 case GDB_WATCHPOINT_ACCESS:
1575 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1576 return -ENOBUFS;
1577 }
1578
1579 if (find_hw_breakpoint(addr, type) >= 0) {
1580 return -EEXIST;
1581 }
1582
1583 nb_hw_watchpoint++;
1584 break;
1585
1586 default:
1587 return -ENOSYS;
1588 }
1589
1590 return 0;
1591 }
1592
1593 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1594 target_ulong len, int type)
1595 {
1596 int n;
1597
1598 n = find_hw_breakpoint(addr, type);
1599 if (n < 0) {
1600 return -ENOENT;
1601 }
1602
1603 switch (type) {
1604 case GDB_BREAKPOINT_HW:
1605 nb_hw_breakpoint--;
1606 break;
1607
1608 case GDB_WATCHPOINT_WRITE:
1609 case GDB_WATCHPOINT_READ:
1610 case GDB_WATCHPOINT_ACCESS:
1611 nb_hw_watchpoint--;
1612 break;
1613
1614 default:
1615 return -ENOSYS;
1616 }
1617 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1618
1619 return 0;
1620 }
1621
1622 void kvm_arch_remove_all_hw_breakpoints(void)
1623 {
1624 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1625 }
1626
1627 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1628 {
1629 int n;
1630
1631 /* Software Breakpoint updates */
1632 if (kvm_sw_breakpoints_active(cs)) {
1633 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1634 }
1635
1636 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1637 <= ARRAY_SIZE(hw_debug_points));
1638 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1639
1640 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1641 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1642 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1643 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1644 switch (hw_debug_points[n].type) {
1645 case GDB_BREAKPOINT_HW:
1646 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1647 break;
1648 case GDB_WATCHPOINT_WRITE:
1649 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1650 break;
1651 case GDB_WATCHPOINT_READ:
1652 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1653 break;
1654 case GDB_WATCHPOINT_ACCESS:
1655 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1656 KVMPPC_DEBUG_WATCH_READ;
1657 break;
1658 default:
1659 cpu_abort(cs, "Unsupported breakpoint type\n");
1660 }
1661 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1662 }
1663 }
1664 }
1665
1666 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1667 {
1668 CPUState *cs = CPU(cpu);
1669 CPUPPCState *env = &cpu->env;
1670 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1671 int handle = 0;
1672 int n;
1673 int flag = 0;
1674
1675 if (cs->singlestep_enabled) {
1676 handle = 1;
1677 } else if (arch_info->status) {
1678 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1679 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1680 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1681 if (n >= 0) {
1682 handle = 1;
1683 }
1684 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1685 KVMPPC_DEBUG_WATCH_WRITE)) {
1686 n = find_hw_watchpoint(arch_info->address, &flag);
1687 if (n >= 0) {
1688 handle = 1;
1689 cs->watchpoint_hit = &hw_watchpoint;
1690 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1691 hw_watchpoint.flags = flag;
1692 }
1693 }
1694 }
1695 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1696 handle = 1;
1697 } else {
1698 /* QEMU is not able to handle debug exception, so inject
1699 * program exception to guest;
1700 * Yes program exception NOT debug exception !!
1701 * When QEMU is using debug resources then debug exception must
1702 * be always set. To achieve this we set MSR_DE and also set
1703 * MSRP_DEP so guest cannot change MSR_DE.
1704 * When emulating debug resource for guest we want guest
1705 * to control MSR_DE (enable/disable debug interrupt on need).
1706 * Supporting both configurations are NOT possible.
1707 * So the result is that we cannot share debug resources
1708 * between QEMU and Guest on BOOKE architecture.
1709 * In the current design QEMU gets the priority over guest,
1710 * this means that if QEMU is using debug resources then guest
1711 * cannot use them;
1712 * For software breakpoint QEMU uses a privileged instruction;
1713 * So there cannot be any reason that we are here for guest
1714 * set debug exception, only possibility is guest executed a
1715 * privileged / illegal instruction and that's why we are
1716 * injecting a program interrupt.
1717 */
1718
1719 cpu_synchronize_state(cs);
1720 /* env->nip is PC, so increment this by 4 to use
1721 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1722 */
1723 env->nip += 4;
1724 cs->exception_index = POWERPC_EXCP_PROGRAM;
1725 env->error_code = POWERPC_EXCP_INVAL;
1726 ppc_cpu_do_interrupt(cs);
1727 }
1728
1729 return handle;
1730 }
1731
1732 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1733 {
1734 PowerPCCPU *cpu = POWERPC_CPU(cs);
1735 CPUPPCState *env = &cpu->env;
1736 int ret;
1737
1738 qemu_mutex_lock_iothread();
1739
1740 switch (run->exit_reason) {
1741 case KVM_EXIT_DCR:
1742 if (run->dcr.is_write) {
1743 DPRINTF("handle dcr write\n");
1744 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1745 } else {
1746 DPRINTF("handle dcr read\n");
1747 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1748 }
1749 break;
1750 case KVM_EXIT_HLT:
1751 DPRINTF("handle halt\n");
1752 ret = kvmppc_handle_halt(cpu);
1753 break;
1754 #if defined(TARGET_PPC64)
1755 case KVM_EXIT_PAPR_HCALL:
1756 DPRINTF("handle PAPR hypercall\n");
1757 run->papr_hcall.ret = spapr_hypercall(cpu,
1758 run->papr_hcall.nr,
1759 run->papr_hcall.args);
1760 ret = 0;
1761 break;
1762 #endif
1763 case KVM_EXIT_EPR:
1764 DPRINTF("handle epr\n");
1765 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1766 ret = 0;
1767 break;
1768 case KVM_EXIT_WATCHDOG:
1769 DPRINTF("handle watchdog expiry\n");
1770 watchdog_perform_action();
1771 ret = 0;
1772 break;
1773
1774 case KVM_EXIT_DEBUG:
1775 DPRINTF("handle debug exception\n");
1776 if (kvm_handle_debug(cpu, run)) {
1777 ret = EXCP_DEBUG;
1778 break;
1779 }
1780 /* re-enter, this exception was guest-internal */
1781 ret = 0;
1782 break;
1783
1784 default:
1785 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1786 ret = -1;
1787 break;
1788 }
1789
1790 qemu_mutex_unlock_iothread();
1791 return ret;
1792 }
1793
1794 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1795 {
1796 CPUState *cs = CPU(cpu);
1797 uint32_t bits = tsr_bits;
1798 struct kvm_one_reg reg = {
1799 .id = KVM_REG_PPC_OR_TSR,
1800 .addr = (uintptr_t) &bits,
1801 };
1802
1803 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1804 }
1805
1806 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1807 {
1808
1809 CPUState *cs = CPU(cpu);
1810 uint32_t bits = tsr_bits;
1811 struct kvm_one_reg reg = {
1812 .id = KVM_REG_PPC_CLEAR_TSR,
1813 .addr = (uintptr_t) &bits,
1814 };
1815
1816 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1817 }
1818
1819 int kvmppc_set_tcr(PowerPCCPU *cpu)
1820 {
1821 CPUState *cs = CPU(cpu);
1822 CPUPPCState *env = &cpu->env;
1823 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1824
1825 struct kvm_one_reg reg = {
1826 .id = KVM_REG_PPC_TCR,
1827 .addr = (uintptr_t) &tcr,
1828 };
1829
1830 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1831 }
1832
1833 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1834 {
1835 CPUState *cs = CPU(cpu);
1836 int ret;
1837
1838 if (!kvm_enabled()) {
1839 return -1;
1840 }
1841
1842 if (!cap_ppc_watchdog) {
1843 printf("warning: KVM does not support watchdog");
1844 return -1;
1845 }
1846
1847 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1848 if (ret < 0) {
1849 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1850 __func__, strerror(-ret));
1851 return ret;
1852 }
1853
1854 return ret;
1855 }
1856
1857 static int read_cpuinfo(const char *field, char *value, int len)
1858 {
1859 FILE *f;
1860 int ret = -1;
1861 int field_len = strlen(field);
1862 char line[512];
1863
1864 f = fopen("/proc/cpuinfo", "r");
1865 if (!f) {
1866 return -1;
1867 }
1868
1869 do {
1870 if (!fgets(line, sizeof(line), f)) {
1871 break;
1872 }
1873 if (!strncmp(line, field, field_len)) {
1874 pstrcpy(value, len, line);
1875 ret = 0;
1876 break;
1877 }
1878 } while(*line);
1879
1880 fclose(f);
1881
1882 return ret;
1883 }
1884
1885 uint32_t kvmppc_get_tbfreq(void)
1886 {
1887 char line[512];
1888 char *ns;
1889 uint32_t retval = NANOSECONDS_PER_SECOND;
1890
1891 if (read_cpuinfo("timebase", line, sizeof(line))) {
1892 return retval;
1893 }
1894
1895 if (!(ns = strchr(line, ':'))) {
1896 return retval;
1897 }
1898
1899 ns++;
1900
1901 return atoi(ns);
1902 }
1903
1904 bool kvmppc_get_host_serial(char **value)
1905 {
1906 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1907 NULL);
1908 }
1909
1910 bool kvmppc_get_host_model(char **value)
1911 {
1912 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1913 }
1914
1915 /* Try to find a device tree node for a CPU with clock-frequency property */
1916 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1917 {
1918 struct dirent *dirp;
1919 DIR *dp;
1920
1921 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1922 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1923 return -1;
1924 }
1925
1926 buf[0] = '\0';
1927 while ((dirp = readdir(dp)) != NULL) {
1928 FILE *f;
1929 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1930 dirp->d_name);
1931 f = fopen(buf, "r");
1932 if (f) {
1933 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1934 fclose(f);
1935 break;
1936 }
1937 buf[0] = '\0';
1938 }
1939 closedir(dp);
1940 if (buf[0] == '\0') {
1941 printf("Unknown host!\n");
1942 return -1;
1943 }
1944
1945 return 0;
1946 }
1947
1948 static uint64_t kvmppc_read_int_dt(const char *filename)
1949 {
1950 union {
1951 uint32_t v32;
1952 uint64_t v64;
1953 } u;
1954 FILE *f;
1955 int len;
1956
1957 f = fopen(filename, "rb");
1958 if (!f) {
1959 return -1;
1960 }
1961
1962 len = fread(&u, 1, sizeof(u), f);
1963 fclose(f);
1964 switch (len) {
1965 case 4:
1966 /* property is a 32-bit quantity */
1967 return be32_to_cpu(u.v32);
1968 case 8:
1969 return be64_to_cpu(u.v64);
1970 }
1971
1972 return 0;
1973 }
1974
1975 /* Read a CPU node property from the host device tree that's a single
1976 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1977 * (can't find or open the property, or doesn't understand the
1978 * format) */
1979 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1980 {
1981 char buf[PATH_MAX], *tmp;
1982 uint64_t val;
1983
1984 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1985 return -1;
1986 }
1987
1988 tmp = g_strdup_printf("%s/%s", buf, propname);
1989 val = kvmppc_read_int_dt(tmp);
1990 g_free(tmp);
1991
1992 return val;
1993 }
1994
1995 uint64_t kvmppc_get_clockfreq(void)
1996 {
1997 return kvmppc_read_int_cpu_dt("clock-frequency");
1998 }
1999
2000 uint32_t kvmppc_get_vmx(void)
2001 {
2002 return kvmppc_read_int_cpu_dt("ibm,vmx");
2003 }
2004
2005 uint32_t kvmppc_get_dfp(void)
2006 {
2007 return kvmppc_read_int_cpu_dt("ibm,dfp");
2008 }
2009
2010 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2011 {
2012 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2013 CPUState *cs = CPU(cpu);
2014
2015 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2016 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2017 return 0;
2018 }
2019
2020 return 1;
2021 }
2022
2023 int kvmppc_get_hasidle(CPUPPCState *env)
2024 {
2025 struct kvm_ppc_pvinfo pvinfo;
2026
2027 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2028 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2029 return 1;
2030 }
2031
2032 return 0;
2033 }
2034
2035 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2036 {
2037 uint32_t *hc = (uint32_t*)buf;
2038 struct kvm_ppc_pvinfo pvinfo;
2039
2040 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2041 memcpy(buf, pvinfo.hcall, buf_len);
2042 return 0;
2043 }
2044
2045 /*
2046 * Fallback to always fail hypercalls regardless of endianness:
2047 *
2048 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2049 * li r3, -1
2050 * b .+8 (becomes nop in wrong endian)
2051 * bswap32(li r3, -1)
2052 */
2053
2054 hc[0] = cpu_to_be32(0x08000048);
2055 hc[1] = cpu_to_be32(0x3860ffff);
2056 hc[2] = cpu_to_be32(0x48000008);
2057 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2058
2059 return 1;
2060 }
2061
2062 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2063 {
2064 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2065 }
2066
2067 void kvmppc_enable_logical_ci_hcalls(void)
2068 {
2069 /*
2070 * FIXME: it would be nice if we could detect the cases where
2071 * we're using a device which requires the in kernel
2072 * implementation of these hcalls, but the kernel lacks them and
2073 * produce a warning.
2074 */
2075 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2076 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2077 }
2078
2079 void kvmppc_enable_set_mode_hcall(void)
2080 {
2081 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2082 }
2083
2084 void kvmppc_enable_clear_ref_mod_hcalls(void)
2085 {
2086 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2087 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2088 }
2089
2090 void kvmppc_set_papr(PowerPCCPU *cpu)
2091 {
2092 CPUState *cs = CPU(cpu);
2093 int ret;
2094
2095 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2096 if (ret) {
2097 error_report("This vCPU type or KVM version does not support PAPR");
2098 exit(1);
2099 }
2100
2101 /* Update the capability flag so we sync the right information
2102 * with kvm */
2103 cap_papr = 1;
2104 }
2105
2106 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2107 {
2108 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2109 }
2110
2111 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2112 {
2113 CPUState *cs = CPU(cpu);
2114 int ret;
2115
2116 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2117 if (ret && mpic_proxy) {
2118 error_report("This KVM version does not support EPR");
2119 exit(1);
2120 }
2121 }
2122
2123 int kvmppc_smt_threads(void)
2124 {
2125 return cap_ppc_smt ? cap_ppc_smt : 1;
2126 }
2127
2128 #ifdef TARGET_PPC64
2129 off_t kvmppc_alloc_rma(void **rma)
2130 {
2131 off_t size;
2132 int fd;
2133 struct kvm_allocate_rma ret;
2134
2135 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2136 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2137 * not necessary on this hardware
2138 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2139 *
2140 * FIXME: We should allow the user to force contiguous RMA
2141 * allocation in the cap_ppc_rma==1 case.
2142 */
2143 if (cap_ppc_rma < 2) {
2144 return 0;
2145 }
2146
2147 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2148 if (fd < 0) {
2149 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2150 strerror(errno));
2151 return -1;
2152 }
2153
2154 size = MIN(ret.rma_size, 256ul << 20);
2155
2156 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2157 if (*rma == MAP_FAILED) {
2158 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2159 return -1;
2160 };
2161
2162 return size;
2163 }
2164
2165 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2166 {
2167 struct kvm_ppc_smmu_info info;
2168 long rampagesize, best_page_shift;
2169 int i;
2170
2171 if (cap_ppc_rma >= 2) {
2172 return current_size;
2173 }
2174
2175 /* Find the largest hardware supported page size that's less than
2176 * or equal to the (logical) backing page size of guest RAM */
2177 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2178 rampagesize = getrampagesize();
2179 best_page_shift = 0;
2180
2181 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2182 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2183
2184 if (!sps->page_shift) {
2185 continue;
2186 }
2187
2188 if ((sps->page_shift > best_page_shift)
2189 && ((1UL << sps->page_shift) <= rampagesize)) {
2190 best_page_shift = sps->page_shift;
2191 }
2192 }
2193
2194 return MIN(current_size,
2195 1ULL << (best_page_shift + hash_shift - 7));
2196 }
2197 #endif
2198
2199 bool kvmppc_spapr_use_multitce(void)
2200 {
2201 return cap_spapr_multitce;
2202 }
2203
2204 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2205 bool need_vfio)
2206 {
2207 struct kvm_create_spapr_tce args = {
2208 .liobn = liobn,
2209 .window_size = window_size,
2210 };
2211 long len;
2212 int fd;
2213 void *table;
2214
2215 /* Must set fd to -1 so we don't try to munmap when called for
2216 * destroying the table, which the upper layers -will- do
2217 */
2218 *pfd = -1;
2219 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2220 return NULL;
2221 }
2222
2223 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2224 if (fd < 0) {
2225 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2226 liobn);
2227 return NULL;
2228 }
2229
2230 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2231 /* FIXME: round this up to page size */
2232
2233 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2234 if (table == MAP_FAILED) {
2235 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2236 liobn);
2237 close(fd);
2238 return NULL;
2239 }
2240
2241 *pfd = fd;
2242 return table;
2243 }
2244
2245 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2246 {
2247 long len;
2248
2249 if (fd < 0) {
2250 return -1;
2251 }
2252
2253 len = nb_table * sizeof(uint64_t);
2254 if ((munmap(table, len) < 0) ||
2255 (close(fd) < 0)) {
2256 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2257 strerror(errno));
2258 /* Leak the table */
2259 }
2260
2261 return 0;
2262 }
2263
2264 int kvmppc_reset_htab(int shift_hint)
2265 {
2266 uint32_t shift = shift_hint;
2267
2268 if (!kvm_enabled()) {
2269 /* Full emulation, tell caller to allocate htab itself */
2270 return 0;
2271 }
2272 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2273 int ret;
2274 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2275 if (ret == -ENOTTY) {
2276 /* At least some versions of PR KVM advertise the
2277 * capability, but don't implement the ioctl(). Oops.
2278 * Return 0 so that we allocate the htab in qemu, as is
2279 * correct for PR. */
2280 return 0;
2281 } else if (ret < 0) {
2282 return ret;
2283 }
2284 return shift;
2285 }
2286
2287 /* We have a kernel that predates the htab reset calls. For PR
2288 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2289 * this era, it has allocated a 16MB fixed size hash table already. */
2290 if (kvmppc_is_pr(kvm_state)) {
2291 /* PR - tell caller to allocate htab */
2292 return 0;
2293 } else {
2294 /* HV - assume 16MB kernel allocated htab */
2295 return 24;
2296 }
2297 }
2298
2299 static inline uint32_t mfpvr(void)
2300 {
2301 uint32_t pvr;
2302
2303 asm ("mfpvr %0"
2304 : "=r"(pvr));
2305 return pvr;
2306 }
2307
2308 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2309 {
2310 if (on) {
2311 *word |= flags;
2312 } else {
2313 *word &= ~flags;
2314 }
2315 }
2316
2317 static void kvmppc_host_cpu_initfn(Object *obj)
2318 {
2319 assert(kvm_enabled());
2320 }
2321
2322 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2323 {
2324 DeviceClass *dc = DEVICE_CLASS(oc);
2325 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2326 uint32_t vmx = kvmppc_get_vmx();
2327 uint32_t dfp = kvmppc_get_dfp();
2328 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2329 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2330
2331 /* Now fix up the class with information we can query from the host */
2332 pcc->pvr = mfpvr();
2333
2334 if (vmx != -1) {
2335 /* Only override when we know what the host supports */
2336 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2337 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2338 }
2339 if (dfp != -1) {
2340 /* Only override when we know what the host supports */
2341 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2342 }
2343
2344 if (dcache_size != -1) {
2345 pcc->l1_dcache_size = dcache_size;
2346 }
2347
2348 if (icache_size != -1) {
2349 pcc->l1_icache_size = icache_size;
2350 }
2351
2352 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2353 dc->cannot_destroy_with_object_finalize_yet = true;
2354 }
2355
2356 bool kvmppc_has_cap_epr(void)
2357 {
2358 return cap_epr;
2359 }
2360
2361 bool kvmppc_has_cap_htab_fd(void)
2362 {
2363 return cap_htab_fd;
2364 }
2365
2366 bool kvmppc_has_cap_fixup_hcalls(void)
2367 {
2368 return cap_fixup_hcalls;
2369 }
2370
2371 bool kvmppc_has_cap_htm(void)
2372 {
2373 return cap_htm;
2374 }
2375
2376 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2377 {
2378 ObjectClass *oc = OBJECT_CLASS(pcc);
2379
2380 while (oc && !object_class_is_abstract(oc)) {
2381 oc = object_class_get_parent(oc);
2382 }
2383 assert(oc);
2384
2385 return POWERPC_CPU_CLASS(oc);
2386 }
2387
2388 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2389 {
2390 uint32_t host_pvr = mfpvr();
2391 PowerPCCPUClass *pvr_pcc;
2392
2393 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2394 if (pvr_pcc == NULL) {
2395 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2396 }
2397
2398 return pvr_pcc;
2399 }
2400
2401 static int kvm_ppc_register_host_cpu_type(void)
2402 {
2403 TypeInfo type_info = {
2404 .name = TYPE_HOST_POWERPC_CPU,
2405 .instance_init = kvmppc_host_cpu_initfn,
2406 .class_init = kvmppc_host_cpu_class_init,
2407 };
2408 PowerPCCPUClass *pvr_pcc;
2409 DeviceClass *dc;
2410
2411 pvr_pcc = kvm_ppc_get_host_cpu_class();
2412 if (pvr_pcc == NULL) {
2413 return -1;
2414 }
2415 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2416 type_register(&type_info);
2417
2418 /* Register generic family CPU class for a family */
2419 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2420 dc = DEVICE_CLASS(pvr_pcc);
2421 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2422 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2423 type_register(&type_info);
2424
2425 #if defined(TARGET_PPC64)
2426 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2427 type_info.parent = TYPE_SPAPR_CPU_CORE,
2428 type_info.instance_size = sizeof(sPAPRCPUCore);
2429 type_info.instance_init = NULL;
2430 type_info.class_init = spapr_cpu_core_class_init;
2431 type_info.class_data = (void *) "host";
2432 type_register(&type_info);
2433 g_free((void *)type_info.name);
2434
2435 /* Register generic spapr CPU family class for current host CPU type */
2436 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2437 type_info.class_data = (void *) dc->desc;
2438 type_register(&type_info);
2439 g_free((void *)type_info.name);
2440 #endif
2441
2442 return 0;
2443 }
2444
2445 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2446 {
2447 struct kvm_rtas_token_args args = {
2448 .token = token,
2449 };
2450
2451 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2452 return -ENOENT;
2453 }
2454
2455 strncpy(args.name, function, sizeof(args.name));
2456
2457 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2458 }
2459
2460 int kvmppc_get_htab_fd(bool write)
2461 {
2462 struct kvm_get_htab_fd s = {
2463 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2464 .start_index = 0,
2465 };
2466
2467 if (!cap_htab_fd) {
2468 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2469 return -1;
2470 }
2471
2472 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2473 }
2474
2475 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2476 {
2477 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2478 uint8_t buf[bufsize];
2479 ssize_t rc;
2480
2481 do {
2482 rc = read(fd, buf, bufsize);
2483 if (rc < 0) {
2484 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2485 strerror(errno));
2486 return rc;
2487 } else if (rc) {
2488 uint8_t *buffer = buf;
2489 ssize_t n = rc;
2490 while (n) {
2491 struct kvm_get_htab_header *head =
2492 (struct kvm_get_htab_header *) buffer;
2493 size_t chunksize = sizeof(*head) +
2494 HASH_PTE_SIZE_64 * head->n_valid;
2495
2496 qemu_put_be32(f, head->index);
2497 qemu_put_be16(f, head->n_valid);
2498 qemu_put_be16(f, head->n_invalid);
2499 qemu_put_buffer(f, (void *)(head + 1),
2500 HASH_PTE_SIZE_64 * head->n_valid);
2501
2502 buffer += chunksize;
2503 n -= chunksize;
2504 }
2505 }
2506 } while ((rc != 0)
2507 && ((max_ns < 0)
2508 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2509
2510 return (rc == 0) ? 1 : 0;
2511 }
2512
2513 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2514 uint16_t n_valid, uint16_t n_invalid)
2515 {
2516 struct kvm_get_htab_header *buf;
2517 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2518 ssize_t rc;
2519
2520 buf = alloca(chunksize);
2521 buf->index = index;
2522 buf->n_valid = n_valid;
2523 buf->n_invalid = n_invalid;
2524
2525 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2526
2527 rc = write(fd, buf, chunksize);
2528 if (rc < 0) {
2529 fprintf(stderr, "Error writing KVM hash table: %s\n",
2530 strerror(errno));
2531 return rc;
2532 }
2533 if (rc != chunksize) {
2534 /* We should never get a short write on a single chunk */
2535 fprintf(stderr, "Short write, restoring KVM hash table\n");
2536 return -1;
2537 }
2538 return 0;
2539 }
2540
2541 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2542 {
2543 return true;
2544 }
2545
2546 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2547 {
2548 return 1;
2549 }
2550
2551 int kvm_arch_on_sigbus(int code, void *addr)
2552 {
2553 return 1;
2554 }
2555
2556 void kvm_arch_init_irq_routing(KVMState *s)
2557 {
2558 }
2559
2560 struct kvm_get_htab_buf {
2561 struct kvm_get_htab_header header;
2562 /*
2563 * We require one extra byte for read
2564 */
2565 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2566 };
2567
2568 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2569 {
2570 int htab_fd;
2571 struct kvm_get_htab_fd ghf;
2572 struct kvm_get_htab_buf *hpte_buf;
2573
2574 ghf.flags = 0;
2575 ghf.start_index = pte_index;
2576 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2577 if (htab_fd < 0) {
2578 goto error_out;
2579 }
2580
2581 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2582 /*
2583 * Read the hpte group
2584 */
2585 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2586 goto out_close;
2587 }
2588
2589 close(htab_fd);
2590 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2591
2592 out_close:
2593 g_free(hpte_buf);
2594 close(htab_fd);
2595 error_out:
2596 return 0;
2597 }
2598
2599 void kvmppc_hash64_free_pteg(uint64_t token)
2600 {
2601 struct kvm_get_htab_buf *htab_buf;
2602
2603 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2604 hpte);
2605 g_free(htab_buf);
2606 return;
2607 }
2608
2609 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2610 target_ulong pte0, target_ulong pte1)
2611 {
2612 int htab_fd;
2613 struct kvm_get_htab_fd ghf;
2614 struct kvm_get_htab_buf hpte_buf;
2615
2616 ghf.flags = 0;
2617 ghf.start_index = 0; /* Ignored */
2618 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2619 if (htab_fd < 0) {
2620 goto error_out;
2621 }
2622
2623 hpte_buf.header.n_valid = 1;
2624 hpte_buf.header.n_invalid = 0;
2625 hpte_buf.header.index = pte_index;
2626 hpte_buf.hpte[0] = pte0;
2627 hpte_buf.hpte[1] = pte1;
2628 /*
2629 * Write the hpte entry.
2630 * CAUTION: write() has the warn_unused_result attribute. Hence we
2631 * need to check the return value, even though we do nothing.
2632 */
2633 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2634 goto out_close;
2635 }
2636
2637 out_close:
2638 close(htab_fd);
2639 return;
2640
2641 error_out:
2642 return;
2643 }
2644
2645 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2646 uint64_t address, uint32_t data, PCIDevice *dev)
2647 {
2648 return 0;
2649 }
2650
2651 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2652 int vector, PCIDevice *dev)
2653 {
2654 return 0;
2655 }
2656
2657 int kvm_arch_release_virq_post(int virq)
2658 {
2659 return 0;
2660 }
2661
2662 int kvm_arch_msi_data_to_gsi(uint32_t data)
2663 {
2664 return data & 0xffff;
2665 }
2666
2667 int kvmppc_enable_hwrng(void)
2668 {
2669 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2670 return -1;
2671 }
2672
2673 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2674 }