WHPX: Use QEMU values for trapped CPUID
[qemu.git] / target / i386 / whpx-all.c
1 /*
2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
3 *
4 * Copyright Microsoft Corp. 2017
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "cpu.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "qemu-common.h"
16 #include "sysemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "qemu/error-report.h"
23 #include "qapi/error.h"
24 #include "migration/blocker.h"
25 #include "whp-dispatch.h"
26
27 #include <WinHvPlatform.h>
28 #include <WinHvEmulation.h>
29
30 struct whpx_state {
31 uint64_t mem_quota;
32 WHV_PARTITION_HANDLE partition;
33 };
34
35 static const WHV_REGISTER_NAME whpx_register_names[] = {
36
37 /* X64 General purpose registers */
38 WHvX64RegisterRax,
39 WHvX64RegisterRcx,
40 WHvX64RegisterRdx,
41 WHvX64RegisterRbx,
42 WHvX64RegisterRsp,
43 WHvX64RegisterRbp,
44 WHvX64RegisterRsi,
45 WHvX64RegisterRdi,
46 WHvX64RegisterR8,
47 WHvX64RegisterR9,
48 WHvX64RegisterR10,
49 WHvX64RegisterR11,
50 WHvX64RegisterR12,
51 WHvX64RegisterR13,
52 WHvX64RegisterR14,
53 WHvX64RegisterR15,
54 WHvX64RegisterRip,
55 WHvX64RegisterRflags,
56
57 /* X64 Segment registers */
58 WHvX64RegisterEs,
59 WHvX64RegisterCs,
60 WHvX64RegisterSs,
61 WHvX64RegisterDs,
62 WHvX64RegisterFs,
63 WHvX64RegisterGs,
64 WHvX64RegisterLdtr,
65 WHvX64RegisterTr,
66
67 /* X64 Table registers */
68 WHvX64RegisterIdtr,
69 WHvX64RegisterGdtr,
70
71 /* X64 Control Registers */
72 WHvX64RegisterCr0,
73 WHvX64RegisterCr2,
74 WHvX64RegisterCr3,
75 WHvX64RegisterCr4,
76 WHvX64RegisterCr8,
77
78 /* X64 Debug Registers */
79 /*
80 * WHvX64RegisterDr0,
81 * WHvX64RegisterDr1,
82 * WHvX64RegisterDr2,
83 * WHvX64RegisterDr3,
84 * WHvX64RegisterDr6,
85 * WHvX64RegisterDr7,
86 */
87
88 /* X64 Floating Point and Vector Registers */
89 WHvX64RegisterXmm0,
90 WHvX64RegisterXmm1,
91 WHvX64RegisterXmm2,
92 WHvX64RegisterXmm3,
93 WHvX64RegisterXmm4,
94 WHvX64RegisterXmm5,
95 WHvX64RegisterXmm6,
96 WHvX64RegisterXmm7,
97 WHvX64RegisterXmm8,
98 WHvX64RegisterXmm9,
99 WHvX64RegisterXmm10,
100 WHvX64RegisterXmm11,
101 WHvX64RegisterXmm12,
102 WHvX64RegisterXmm13,
103 WHvX64RegisterXmm14,
104 WHvX64RegisterXmm15,
105 WHvX64RegisterFpMmx0,
106 WHvX64RegisterFpMmx1,
107 WHvX64RegisterFpMmx2,
108 WHvX64RegisterFpMmx3,
109 WHvX64RegisterFpMmx4,
110 WHvX64RegisterFpMmx5,
111 WHvX64RegisterFpMmx6,
112 WHvX64RegisterFpMmx7,
113 WHvX64RegisterFpControlStatus,
114 WHvX64RegisterXmmControlStatus,
115
116 /* X64 MSRs */
117 WHvX64RegisterEfer,
118 #ifdef TARGET_X86_64
119 WHvX64RegisterKernelGsBase,
120 #endif
121 WHvX64RegisterApicBase,
122 /* WHvX64RegisterPat, */
123 WHvX64RegisterSysenterCs,
124 WHvX64RegisterSysenterEip,
125 WHvX64RegisterSysenterEsp,
126 WHvX64RegisterStar,
127 #ifdef TARGET_X86_64
128 WHvX64RegisterLstar,
129 WHvX64RegisterCstar,
130 WHvX64RegisterSfmask,
131 #endif
132
133 /* Interrupt / Event Registers */
134 /*
135 * WHvRegisterPendingInterruption,
136 * WHvRegisterInterruptState,
137 * WHvRegisterPendingEvent0,
138 * WHvRegisterPendingEvent1
139 * WHvX64RegisterDeliverabilityNotifications,
140 */
141 };
142
143 struct whpx_register_set {
144 WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
145 };
146
147 struct whpx_vcpu {
148 WHV_EMULATOR_HANDLE emulator;
149 bool window_registered;
150 bool interruptable;
151 uint64_t tpr;
152 uint64_t apic_base;
153 bool interruption_pending;
154
155 /* Must be the last field as it may have a tail */
156 WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
157 };
158
159 static bool whpx_allowed;
160 static bool whp_dispatch_initialized;
161 static HMODULE hWinHvPlatform, hWinHvEmulation;
162
163 struct whpx_state whpx_global;
164 struct WHPDispatch whp_dispatch;
165
166
167 /*
168 * VP support
169 */
170
171 static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
172 {
173 return (struct whpx_vcpu *)cpu->hax_vcpu;
174 }
175
176 static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
177 int r86)
178 {
179 WHV_X64_SEGMENT_REGISTER hs;
180 unsigned flags = qs->flags;
181
182 hs.Base = qs->base;
183 hs.Limit = qs->limit;
184 hs.Selector = qs->selector;
185
186 if (v86) {
187 hs.Attributes = 0;
188 hs.SegmentType = 3;
189 hs.Present = 1;
190 hs.DescriptorPrivilegeLevel = 3;
191 hs.NonSystemSegment = 1;
192
193 } else {
194 hs.Attributes = (flags >> DESC_TYPE_SHIFT);
195
196 if (r86) {
197 /* hs.Base &= 0xfffff; */
198 }
199 }
200
201 return hs;
202 }
203
204 static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
205 {
206 SegmentCache qs;
207
208 qs.base = hs->Base;
209 qs.limit = hs->Limit;
210 qs.selector = hs->Selector;
211
212 qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
213
214 return qs;
215 }
216
217 static int whpx_set_tsc(CPUState *cpu)
218 {
219 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
220 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
221 WHV_REGISTER_VALUE tsc_val;
222 HRESULT hr;
223 struct whpx_state *whpx = &whpx_global;
224
225 /*
226 * Suspend the partition prior to setting the TSC to reduce the variance
227 * in TSC across vCPUs. When the first vCPU runs post suspend, the
228 * partition is automatically resumed.
229 */
230 if (whp_dispatch.WHvSuspendPartitionTime) {
231
232 /*
233 * Unable to suspend partition while setting TSC is not a fatal
234 * error. It just increases the likelihood of TSC variance between
235 * vCPUs and some guest OS are able to handle that just fine.
236 */
237 hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
238 if (FAILED(hr)) {
239 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
240 }
241 }
242
243 tsc_val.Reg64 = env->tsc;
244 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
245 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
246 if (FAILED(hr)) {
247 error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
248 return -1;
249 }
250
251 return 0;
252 }
253
254 static void whpx_set_registers(CPUState *cpu, int level)
255 {
256 struct whpx_state *whpx = &whpx_global;
257 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
258 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
259 X86CPU *x86_cpu = X86_CPU(cpu);
260 struct whpx_register_set vcxt;
261 HRESULT hr;
262 int idx;
263 int idx_next;
264 int i;
265 int v86, r86;
266
267 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
268
269 /*
270 * Following MSRs have side effects on the guest or are too heavy for
271 * runtime. Limit them to full state update.
272 */
273 if (level >= WHPX_SET_RESET_STATE) {
274 whpx_set_tsc(cpu);
275 }
276
277 memset(&vcxt, 0, sizeof(struct whpx_register_set));
278
279 v86 = (env->eflags & VM_MASK);
280 r86 = !(env->cr[0] & CR0_PE_MASK);
281
282 vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
283 vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
284
285 idx = 0;
286
287 /* Indexes for first 16 registers match between HV and QEMU definitions */
288 idx_next = 16;
289 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
290 vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
291 }
292 idx = idx_next;
293
294 /* Same goes for RIP and RFLAGS */
295 assert(whpx_register_names[idx] == WHvX64RegisterRip);
296 vcxt.values[idx++].Reg64 = env->eip;
297
298 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
299 vcxt.values[idx++].Reg64 = env->eflags;
300
301 /* Translate 6+4 segment registers. HV and QEMU order matches */
302 assert(idx == WHvX64RegisterEs);
303 for (i = 0; i < 6; i += 1, idx += 1) {
304 vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
305 }
306
307 assert(idx == WHvX64RegisterLdtr);
308 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
309
310 assert(idx == WHvX64RegisterTr);
311 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
312
313 assert(idx == WHvX64RegisterIdtr);
314 vcxt.values[idx].Table.Base = env->idt.base;
315 vcxt.values[idx].Table.Limit = env->idt.limit;
316 idx += 1;
317
318 assert(idx == WHvX64RegisterGdtr);
319 vcxt.values[idx].Table.Base = env->gdt.base;
320 vcxt.values[idx].Table.Limit = env->gdt.limit;
321 idx += 1;
322
323 /* CR0, 2, 3, 4, 8 */
324 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
325 vcxt.values[idx++].Reg64 = env->cr[0];
326 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
327 vcxt.values[idx++].Reg64 = env->cr[2];
328 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
329 vcxt.values[idx++].Reg64 = env->cr[3];
330 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
331 vcxt.values[idx++].Reg64 = env->cr[4];
332 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
333 vcxt.values[idx++].Reg64 = vcpu->tpr;
334
335 /* 8 Debug Registers - Skipped */
336
337 /* 16 XMM registers */
338 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
339 idx_next = idx + 16;
340 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
341 vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
342 vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
343 }
344 idx = idx_next;
345
346 /* 8 FP registers */
347 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
348 for (i = 0; i < 8; i += 1, idx += 1) {
349 vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
350 /* vcxt.values[idx].Fp.AsUINT128.High64 =
351 env->fpregs[i].mmx.MMX_Q(1);
352 */
353 }
354
355 /* FP control status register */
356 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
357 vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
358 vcxt.values[idx].FpControlStatus.FpStatus =
359 (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
360 vcxt.values[idx].FpControlStatus.FpTag = 0;
361 for (i = 0; i < 8; ++i) {
362 vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
363 }
364 vcxt.values[idx].FpControlStatus.Reserved = 0;
365 vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
366 vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
367 idx += 1;
368
369 /* XMM control status register */
370 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
371 vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
372 vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
373 vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
374 idx += 1;
375
376 /* MSRs */
377 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
378 vcxt.values[idx++].Reg64 = env->efer;
379 #ifdef TARGET_X86_64
380 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
381 vcxt.values[idx++].Reg64 = env->kernelgsbase;
382 #endif
383
384 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
385 vcxt.values[idx++].Reg64 = vcpu->apic_base;
386
387 /* WHvX64RegisterPat - Skipped */
388
389 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
390 vcxt.values[idx++].Reg64 = env->sysenter_cs;
391 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
392 vcxt.values[idx++].Reg64 = env->sysenter_eip;
393 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
394 vcxt.values[idx++].Reg64 = env->sysenter_esp;
395 assert(whpx_register_names[idx] == WHvX64RegisterStar);
396 vcxt.values[idx++].Reg64 = env->star;
397 #ifdef TARGET_X86_64
398 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
399 vcxt.values[idx++].Reg64 = env->lstar;
400 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
401 vcxt.values[idx++].Reg64 = env->cstar;
402 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
403 vcxt.values[idx++].Reg64 = env->fmask;
404 #endif
405
406 /* Interrupt / Event Registers - Skipped */
407
408 assert(idx == RTL_NUMBER_OF(whpx_register_names));
409
410 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
411 whpx->partition, cpu->cpu_index,
412 whpx_register_names,
413 RTL_NUMBER_OF(whpx_register_names),
414 &vcxt.values[0]);
415
416 if (FAILED(hr)) {
417 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
418 hr);
419 }
420
421 return;
422 }
423
424 static int whpx_get_tsc(CPUState *cpu)
425 {
426 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
427 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
428 WHV_REGISTER_VALUE tsc_val;
429 HRESULT hr;
430 struct whpx_state *whpx = &whpx_global;
431
432 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
433 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
434 if (FAILED(hr)) {
435 error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
436 return -1;
437 }
438
439 env->tsc = tsc_val.Reg64;
440 return 0;
441 }
442
443 static void whpx_get_registers(CPUState *cpu)
444 {
445 struct whpx_state *whpx = &whpx_global;
446 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
447 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
448 X86CPU *x86_cpu = X86_CPU(cpu);
449 struct whpx_register_set vcxt;
450 uint64_t tpr, apic_base;
451 HRESULT hr;
452 int idx;
453 int idx_next;
454 int i;
455
456 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
457
458 if (!env->tsc_valid) {
459 whpx_get_tsc(cpu);
460 env->tsc_valid = !runstate_is_running();
461 }
462
463 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
464 whpx->partition, cpu->cpu_index,
465 whpx_register_names,
466 RTL_NUMBER_OF(whpx_register_names),
467 &vcxt.values[0]);
468 if (FAILED(hr)) {
469 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
470 hr);
471 }
472
473 idx = 0;
474
475 /* Indexes for first 16 registers match between HV and QEMU definitions */
476 idx_next = 16;
477 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
478 env->regs[idx] = vcxt.values[idx].Reg64;
479 }
480 idx = idx_next;
481
482 /* Same goes for RIP and RFLAGS */
483 assert(whpx_register_names[idx] == WHvX64RegisterRip);
484 env->eip = vcxt.values[idx++].Reg64;
485 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
486 env->eflags = vcxt.values[idx++].Reg64;
487
488 /* Translate 6+4 segment registers. HV and QEMU order matches */
489 assert(idx == WHvX64RegisterEs);
490 for (i = 0; i < 6; i += 1, idx += 1) {
491 env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
492 }
493
494 assert(idx == WHvX64RegisterLdtr);
495 env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
496 assert(idx == WHvX64RegisterTr);
497 env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
498 assert(idx == WHvX64RegisterIdtr);
499 env->idt.base = vcxt.values[idx].Table.Base;
500 env->idt.limit = vcxt.values[idx].Table.Limit;
501 idx += 1;
502 assert(idx == WHvX64RegisterGdtr);
503 env->gdt.base = vcxt.values[idx].Table.Base;
504 env->gdt.limit = vcxt.values[idx].Table.Limit;
505 idx += 1;
506
507 /* CR0, 2, 3, 4, 8 */
508 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
509 env->cr[0] = vcxt.values[idx++].Reg64;
510 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
511 env->cr[2] = vcxt.values[idx++].Reg64;
512 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
513 env->cr[3] = vcxt.values[idx++].Reg64;
514 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
515 env->cr[4] = vcxt.values[idx++].Reg64;
516 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
517 tpr = vcxt.values[idx++].Reg64;
518 if (tpr != vcpu->tpr) {
519 vcpu->tpr = tpr;
520 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
521 }
522
523 /* 8 Debug Registers - Skipped */
524
525 /* 16 XMM registers */
526 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
527 idx_next = idx + 16;
528 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
529 env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
530 env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
531 }
532 idx = idx_next;
533
534 /* 8 FP registers */
535 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
536 for (i = 0; i < 8; i += 1, idx += 1) {
537 env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
538 /* env->fpregs[i].mmx.MMX_Q(1) =
539 vcxt.values[idx].Fp.AsUINT128.High64;
540 */
541 }
542
543 /* FP control status register */
544 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
545 env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
546 env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
547 env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
548 for (i = 0; i < 8; ++i) {
549 env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
550 }
551 env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
552 env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
553 idx += 1;
554
555 /* XMM control status register */
556 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
557 env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
558 idx += 1;
559
560 /* MSRs */
561 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
562 env->efer = vcxt.values[idx++].Reg64;
563 #ifdef TARGET_X86_64
564 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
565 env->kernelgsbase = vcxt.values[idx++].Reg64;
566 #endif
567
568 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
569 apic_base = vcxt.values[idx++].Reg64;
570 if (apic_base != vcpu->apic_base) {
571 vcpu->apic_base = apic_base;
572 cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
573 }
574
575 /* WHvX64RegisterPat - Skipped */
576
577 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
578 env->sysenter_cs = vcxt.values[idx++].Reg64;
579 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
580 env->sysenter_eip = vcxt.values[idx++].Reg64;
581 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
582 env->sysenter_esp = vcxt.values[idx++].Reg64;
583 assert(whpx_register_names[idx] == WHvX64RegisterStar);
584 env->star = vcxt.values[idx++].Reg64;
585 #ifdef TARGET_X86_64
586 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
587 env->lstar = vcxt.values[idx++].Reg64;
588 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
589 env->cstar = vcxt.values[idx++].Reg64;
590 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
591 env->fmask = vcxt.values[idx++].Reg64;
592 #endif
593
594 /* Interrupt / Event Registers - Skipped */
595
596 assert(idx == RTL_NUMBER_OF(whpx_register_names));
597
598 return;
599 }
600
601 static HRESULT CALLBACK whpx_emu_ioport_callback(
602 void *ctx,
603 WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
604 {
605 MemTxAttrs attrs = { 0 };
606 address_space_rw(&address_space_io, IoAccess->Port, attrs,
607 &IoAccess->Data, IoAccess->AccessSize,
608 IoAccess->Direction);
609 return S_OK;
610 }
611
612 static HRESULT CALLBACK whpx_emu_mmio_callback(
613 void *ctx,
614 WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
615 {
616 cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
617 ma->Direction);
618 return S_OK;
619 }
620
621 static HRESULT CALLBACK whpx_emu_getreg_callback(
622 void *ctx,
623 const WHV_REGISTER_NAME *RegisterNames,
624 UINT32 RegisterCount,
625 WHV_REGISTER_VALUE *RegisterValues)
626 {
627 HRESULT hr;
628 struct whpx_state *whpx = &whpx_global;
629 CPUState *cpu = (CPUState *)ctx;
630
631 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
632 whpx->partition, cpu->cpu_index,
633 RegisterNames, RegisterCount,
634 RegisterValues);
635 if (FAILED(hr)) {
636 error_report("WHPX: Failed to get virtual processor registers,"
637 " hr=%08lx", hr);
638 }
639
640 return hr;
641 }
642
643 static HRESULT CALLBACK whpx_emu_setreg_callback(
644 void *ctx,
645 const WHV_REGISTER_NAME *RegisterNames,
646 UINT32 RegisterCount,
647 const WHV_REGISTER_VALUE *RegisterValues)
648 {
649 HRESULT hr;
650 struct whpx_state *whpx = &whpx_global;
651 CPUState *cpu = (CPUState *)ctx;
652
653 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
654 whpx->partition, cpu->cpu_index,
655 RegisterNames, RegisterCount,
656 RegisterValues);
657 if (FAILED(hr)) {
658 error_report("WHPX: Failed to set virtual processor registers,"
659 " hr=%08lx", hr);
660 }
661
662 /*
663 * The emulator just successfully wrote the register state. We clear the
664 * dirty state so we avoid the double write on resume of the VP.
665 */
666 cpu->vcpu_dirty = false;
667
668 return hr;
669 }
670
671 static HRESULT CALLBACK whpx_emu_translate_callback(
672 void *ctx,
673 WHV_GUEST_VIRTUAL_ADDRESS Gva,
674 WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
675 WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
676 WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
677 {
678 HRESULT hr;
679 struct whpx_state *whpx = &whpx_global;
680 CPUState *cpu = (CPUState *)ctx;
681 WHV_TRANSLATE_GVA_RESULT res;
682
683 hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
684 Gva, TranslateFlags, &res, Gpa);
685 if (FAILED(hr)) {
686 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
687 } else {
688 *TranslationResult = res.ResultCode;
689 }
690
691 return hr;
692 }
693
694 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
695 .Size = sizeof(WHV_EMULATOR_CALLBACKS),
696 .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
697 .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
698 .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
699 .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
700 .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
701 };
702
703 static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
704 {
705 HRESULT hr;
706 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
707 WHV_EMULATOR_STATUS emu_status;
708
709 hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
710 vcpu->emulator, cpu,
711 &vcpu->exit_ctx.VpContext, ctx,
712 &emu_status);
713 if (FAILED(hr)) {
714 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
715 return -1;
716 }
717
718 if (!emu_status.EmulationSuccessful) {
719 error_report("WHPX: Failed to emulate MMIO access with"
720 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
721 return -1;
722 }
723
724 return 0;
725 }
726
727 static int whpx_handle_portio(CPUState *cpu,
728 WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
729 {
730 HRESULT hr;
731 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
732 WHV_EMULATOR_STATUS emu_status;
733
734 hr = whp_dispatch.WHvEmulatorTryIoEmulation(
735 vcpu->emulator, cpu,
736 &vcpu->exit_ctx.VpContext, ctx,
737 &emu_status);
738 if (FAILED(hr)) {
739 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
740 return -1;
741 }
742
743 if (!emu_status.EmulationSuccessful) {
744 error_report("WHPX: Failed to emulate PortIO access with"
745 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
746 return -1;
747 }
748
749 return 0;
750 }
751
752 static int whpx_handle_halt(CPUState *cpu)
753 {
754 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
755 int ret = 0;
756
757 qemu_mutex_lock_iothread();
758 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
759 (env->eflags & IF_MASK)) &&
760 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
761 cpu->exception_index = EXCP_HLT;
762 cpu->halted = true;
763 ret = 1;
764 }
765 qemu_mutex_unlock_iothread();
766
767 return ret;
768 }
769
770 static void whpx_vcpu_pre_run(CPUState *cpu)
771 {
772 HRESULT hr;
773 struct whpx_state *whpx = &whpx_global;
774 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
775 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
776 X86CPU *x86_cpu = X86_CPU(cpu);
777 int irq;
778 uint8_t tpr;
779 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
780 UINT32 reg_count = 0;
781 WHV_REGISTER_VALUE reg_values[3];
782 WHV_REGISTER_NAME reg_names[3];
783
784 memset(&new_int, 0, sizeof(new_int));
785 memset(reg_values, 0, sizeof(reg_values));
786
787 qemu_mutex_lock_iothread();
788
789 /* Inject NMI */
790 if (!vcpu->interruption_pending &&
791 cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
792 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
793 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
794 vcpu->interruptable = false;
795 new_int.InterruptionType = WHvX64PendingNmi;
796 new_int.InterruptionPending = 1;
797 new_int.InterruptionVector = 2;
798 }
799 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
800 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
801 }
802 }
803
804 /*
805 * Force the VCPU out of its inner loop to process any INIT requests or
806 * commit pending TPR access.
807 */
808 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
809 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
810 !(env->hflags & HF_SMM_MASK)) {
811 cpu->exit_request = 1;
812 }
813 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
814 cpu->exit_request = 1;
815 }
816 }
817
818 /* Get pending hard interruption or replay one that was overwritten */
819 if (!vcpu->interruption_pending &&
820 vcpu->interruptable && (env->eflags & IF_MASK)) {
821 assert(!new_int.InterruptionPending);
822 if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
823 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
824 irq = cpu_get_pic_interrupt(env);
825 if (irq >= 0) {
826 new_int.InterruptionType = WHvX64PendingInterrupt;
827 new_int.InterruptionPending = 1;
828 new_int.InterruptionVector = irq;
829 }
830 }
831 }
832
833 /* Setup interrupt state if new one was prepared */
834 if (new_int.InterruptionPending) {
835 reg_values[reg_count].PendingInterruption = new_int;
836 reg_names[reg_count] = WHvRegisterPendingInterruption;
837 reg_count += 1;
838 }
839
840 /* Sync the TPR to the CR8 if was modified during the intercept */
841 tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
842 if (tpr != vcpu->tpr) {
843 vcpu->tpr = tpr;
844 reg_values[reg_count].Reg64 = tpr;
845 cpu->exit_request = 1;
846 reg_names[reg_count] = WHvX64RegisterCr8;
847 reg_count += 1;
848 }
849
850 /* Update the state of the interrupt delivery notification */
851 if (!vcpu->window_registered &&
852 cpu->interrupt_request & CPU_INTERRUPT_HARD) {
853 reg_values[reg_count].DeliverabilityNotifications.InterruptNotification
854 = 1;
855 vcpu->window_registered = 1;
856 reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
857 reg_count += 1;
858 }
859
860 qemu_mutex_unlock_iothread();
861
862 if (reg_count) {
863 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
864 whpx->partition, cpu->cpu_index,
865 reg_names, reg_count, reg_values);
866 if (FAILED(hr)) {
867 error_report("WHPX: Failed to set interrupt state registers,"
868 " hr=%08lx", hr);
869 }
870 }
871
872 return;
873 }
874
875 static void whpx_vcpu_post_run(CPUState *cpu)
876 {
877 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
878 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
879 X86CPU *x86_cpu = X86_CPU(cpu);
880
881 env->eflags = vcpu->exit_ctx.VpContext.Rflags;
882
883 uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
884 if (vcpu->tpr != tpr) {
885 vcpu->tpr = tpr;
886 qemu_mutex_lock_iothread();
887 cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr);
888 qemu_mutex_unlock_iothread();
889 }
890
891 vcpu->interruption_pending =
892 vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
893
894 vcpu->interruptable =
895 !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
896
897 return;
898 }
899
900 static void whpx_vcpu_process_async_events(CPUState *cpu)
901 {
902 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
903 X86CPU *x86_cpu = X86_CPU(cpu);
904 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
905
906 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
907 !(env->hflags & HF_SMM_MASK)) {
908
909 do_cpu_init(x86_cpu);
910 cpu->vcpu_dirty = true;
911 vcpu->interruptable = true;
912 }
913
914 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
915 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
916 apic_poll_irq(x86_cpu->apic_state);
917 }
918
919 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
920 (env->eflags & IF_MASK)) ||
921 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
922 cpu->halted = false;
923 }
924
925 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
926 if (!cpu->vcpu_dirty) {
927 whpx_get_registers(cpu);
928 }
929 do_cpu_sipi(x86_cpu);
930 }
931
932 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
933 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
934 if (!cpu->vcpu_dirty) {
935 whpx_get_registers(cpu);
936 }
937 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
938 env->tpr_access_type);
939 }
940
941 return;
942 }
943
944 static int whpx_vcpu_run(CPUState *cpu)
945 {
946 HRESULT hr;
947 struct whpx_state *whpx = &whpx_global;
948 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
949 int ret;
950
951 whpx_vcpu_process_async_events(cpu);
952 if (cpu->halted) {
953 cpu->exception_index = EXCP_HLT;
954 atomic_set(&cpu->exit_request, false);
955 return 0;
956 }
957
958 qemu_mutex_unlock_iothread();
959 cpu_exec_start(cpu);
960
961 do {
962 if (cpu->vcpu_dirty) {
963 whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
964 cpu->vcpu_dirty = false;
965 }
966
967 whpx_vcpu_pre_run(cpu);
968
969 if (atomic_read(&cpu->exit_request)) {
970 whpx_vcpu_kick(cpu);
971 }
972
973 hr = whp_dispatch.WHvRunVirtualProcessor(
974 whpx->partition, cpu->cpu_index,
975 &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
976
977 if (FAILED(hr)) {
978 error_report("WHPX: Failed to exec a virtual processor,"
979 " hr=%08lx", hr);
980 ret = -1;
981 break;
982 }
983
984 whpx_vcpu_post_run(cpu);
985
986 switch (vcpu->exit_ctx.ExitReason) {
987 case WHvRunVpExitReasonMemoryAccess:
988 ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
989 break;
990
991 case WHvRunVpExitReasonX64IoPortAccess:
992 ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
993 break;
994
995 case WHvRunVpExitReasonX64InterruptWindow:
996 vcpu->window_registered = 0;
997 ret = 0;
998 break;
999
1000 case WHvRunVpExitReasonX64Halt:
1001 ret = whpx_handle_halt(cpu);
1002 break;
1003
1004 case WHvRunVpExitReasonCanceled:
1005 cpu->exception_index = EXCP_INTERRUPT;
1006 ret = 1;
1007 break;
1008
1009 case WHvRunVpExitReasonX64MsrAccess: {
1010 WHV_REGISTER_VALUE reg_values[3] = {0};
1011 WHV_REGISTER_NAME reg_names[3];
1012 UINT32 reg_count;
1013
1014 reg_names[0] = WHvX64RegisterRip;
1015 reg_names[1] = WHvX64RegisterRax;
1016 reg_names[2] = WHvX64RegisterRdx;
1017
1018 reg_values[0].Reg64 =
1019 vcpu->exit_ctx.VpContext.Rip +
1020 vcpu->exit_ctx.VpContext.InstructionLength;
1021
1022 /*
1023 * For all unsupported MSR access we:
1024 * ignore writes
1025 * return 0 on read.
1026 */
1027 reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1028 1 : 3;
1029
1030 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1031 whpx->partition,
1032 cpu->cpu_index,
1033 reg_names, reg_count,
1034 reg_values);
1035
1036 if (FAILED(hr)) {
1037 error_report("WHPX: Failed to set MsrAccess state "
1038 " registers, hr=%08lx", hr);
1039 }
1040 ret = 0;
1041 break;
1042 }
1043 case WHvRunVpExitReasonX64Cpuid: {
1044 WHV_REGISTER_VALUE reg_values[5];
1045 WHV_REGISTER_NAME reg_names[5];
1046 UINT32 reg_count = 5;
1047 UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1048 X86CPU *x86_cpu = X86_CPU(cpu);
1049 CPUX86State *env = &x86_cpu->env;
1050
1051 memset(reg_values, 0, sizeof(reg_values));
1052
1053 rip = vcpu->exit_ctx.VpContext.Rip +
1054 vcpu->exit_ctx.VpContext.InstructionLength;
1055 cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
1056
1057 /*
1058 * Ideally, these should be supplied to the hypervisor during VCPU
1059 * initialization and it should be able to satisfy this request.
1060 * But, currently, WHPX doesn't support setting CPUID values in the
1061 * hypervisor once the partition has been setup, which is too late
1062 * since VCPUs are realized later. For now, use the values from
1063 * QEMU to satisfy these requests, until WHPX adds support for
1064 * being able to set these values in the hypervisor at runtime.
1065 */
1066 cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1067 (UINT32 *)&rcx, (UINT32 *)&rdx);
1068 switch (cpuid_fn) {
1069 case 0x80000001:
1070 /* Remove any support of OSVW */
1071 rcx &= ~CPUID_EXT3_OSVW;
1072 break;
1073 }
1074
1075 reg_names[0] = WHvX64RegisterRip;
1076 reg_names[1] = WHvX64RegisterRax;
1077 reg_names[2] = WHvX64RegisterRcx;
1078 reg_names[3] = WHvX64RegisterRdx;
1079 reg_names[4] = WHvX64RegisterRbx;
1080
1081 reg_values[0].Reg64 = rip;
1082 reg_values[1].Reg64 = rax;
1083 reg_values[2].Reg64 = rcx;
1084 reg_values[3].Reg64 = rdx;
1085 reg_values[4].Reg64 = rbx;
1086
1087 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1088 whpx->partition, cpu->cpu_index,
1089 reg_names,
1090 reg_count,
1091 reg_values);
1092
1093 if (FAILED(hr)) {
1094 error_report("WHPX: Failed to set CpuidAccess state registers,"
1095 " hr=%08lx", hr);
1096 }
1097 ret = 0;
1098 break;
1099 }
1100 case WHvRunVpExitReasonNone:
1101 case WHvRunVpExitReasonUnrecoverableException:
1102 case WHvRunVpExitReasonInvalidVpRegisterValue:
1103 case WHvRunVpExitReasonUnsupportedFeature:
1104 case WHvRunVpExitReasonException:
1105 default:
1106 error_report("WHPX: Unexpected VP exit code %d",
1107 vcpu->exit_ctx.ExitReason);
1108 whpx_get_registers(cpu);
1109 qemu_mutex_lock_iothread();
1110 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
1111 qemu_mutex_unlock_iothread();
1112 break;
1113 }
1114
1115 } while (!ret);
1116
1117 cpu_exec_end(cpu);
1118 qemu_mutex_lock_iothread();
1119 current_cpu = cpu;
1120
1121 atomic_set(&cpu->exit_request, false);
1122
1123 return ret < 0;
1124 }
1125
1126 static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
1127 {
1128 whpx_get_registers(cpu);
1129 cpu->vcpu_dirty = true;
1130 }
1131
1132 static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
1133 run_on_cpu_data arg)
1134 {
1135 whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
1136 cpu->vcpu_dirty = false;
1137 }
1138
1139 static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
1140 run_on_cpu_data arg)
1141 {
1142 whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
1143 cpu->vcpu_dirty = false;
1144 }
1145
1146 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
1147 run_on_cpu_data arg)
1148 {
1149 cpu->vcpu_dirty = true;
1150 }
1151
1152 /*
1153 * CPU support.
1154 */
1155
1156 void whpx_cpu_synchronize_state(CPUState *cpu)
1157 {
1158 if (!cpu->vcpu_dirty) {
1159 run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
1160 }
1161 }
1162
1163 void whpx_cpu_synchronize_post_reset(CPUState *cpu)
1164 {
1165 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
1166 }
1167
1168 void whpx_cpu_synchronize_post_init(CPUState *cpu)
1169 {
1170 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
1171 }
1172
1173 void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
1174 {
1175 run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
1176 }
1177
1178 /*
1179 * Vcpu support.
1180 */
1181
1182 static Error *whpx_migration_blocker;
1183
1184 static void whpx_cpu_update_state(void *opaque, int running, RunState state)
1185 {
1186 CPUX86State *env = opaque;
1187
1188 if (running) {
1189 env->tsc_valid = false;
1190 }
1191 }
1192
1193 int whpx_init_vcpu(CPUState *cpu)
1194 {
1195 HRESULT hr;
1196 struct whpx_state *whpx = &whpx_global;
1197 struct whpx_vcpu *vcpu;
1198 Error *local_error = NULL;
1199
1200 /* Add migration blockers for all unsupported features of the
1201 * Windows Hypervisor Platform
1202 */
1203 if (whpx_migration_blocker == NULL) {
1204 error_setg(&whpx_migration_blocker,
1205 "State blocked due to non-migratable CPUID feature support,"
1206 "dirty memory tracking support, and XSAVE/XRSTOR support");
1207
1208 (void)migrate_add_blocker(whpx_migration_blocker, &local_error);
1209 if (local_error) {
1210 error_report_err(local_error);
1211 migrate_del_blocker(whpx_migration_blocker);
1212 error_free(whpx_migration_blocker);
1213 return -EINVAL;
1214 }
1215 }
1216
1217 vcpu = g_malloc0(sizeof(struct whpx_vcpu));
1218
1219 if (!vcpu) {
1220 error_report("WHPX: Failed to allocte VCPU context.");
1221 return -ENOMEM;
1222 }
1223
1224 hr = whp_dispatch.WHvEmulatorCreateEmulator(
1225 &whpx_emu_callbacks,
1226 &vcpu->emulator);
1227 if (FAILED(hr)) {
1228 error_report("WHPX: Failed to setup instruction completion support,"
1229 " hr=%08lx", hr);
1230 g_free(vcpu);
1231 return -EINVAL;
1232 }
1233
1234 hr = whp_dispatch.WHvCreateVirtualProcessor(
1235 whpx->partition, cpu->cpu_index, 0);
1236 if (FAILED(hr)) {
1237 error_report("WHPX: Failed to create a virtual processor,"
1238 " hr=%08lx", hr);
1239 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1240 g_free(vcpu);
1241 return -EINVAL;
1242 }
1243
1244 vcpu->interruptable = true;
1245
1246 cpu->vcpu_dirty = true;
1247 cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
1248 qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
1249
1250 return 0;
1251 }
1252
1253 int whpx_vcpu_exec(CPUState *cpu)
1254 {
1255 int ret;
1256 int fatal;
1257
1258 for (;;) {
1259 if (cpu->exception_index >= EXCP_INTERRUPT) {
1260 ret = cpu->exception_index;
1261 cpu->exception_index = -1;
1262 break;
1263 }
1264
1265 fatal = whpx_vcpu_run(cpu);
1266
1267 if (fatal) {
1268 error_report("WHPX: Failed to exec a virtual processor");
1269 abort();
1270 }
1271 }
1272
1273 return ret;
1274 }
1275
1276 void whpx_destroy_vcpu(CPUState *cpu)
1277 {
1278 struct whpx_state *whpx = &whpx_global;
1279 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1280
1281 whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
1282 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1283 g_free(cpu->hax_vcpu);
1284 return;
1285 }
1286
1287 void whpx_vcpu_kick(CPUState *cpu)
1288 {
1289 struct whpx_state *whpx = &whpx_global;
1290 whp_dispatch.WHvCancelRunVirtualProcessor(
1291 whpx->partition, cpu->cpu_index, 0);
1292 }
1293
1294 /*
1295 * Memory support.
1296 */
1297
1298 static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
1299 void *host_va, int add, int rom,
1300 const char *name)
1301 {
1302 struct whpx_state *whpx = &whpx_global;
1303 HRESULT hr;
1304
1305 /*
1306 if (add) {
1307 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
1308 (void*)start_pa, (void*)size, host_va,
1309 (rom ? "ROM" : "RAM"), name);
1310 } else {
1311 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
1312 (void*)start_pa, (void*)size, host_va, name);
1313 }
1314 */
1315
1316 if (add) {
1317 hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
1318 host_va,
1319 start_pa,
1320 size,
1321 (WHvMapGpaRangeFlagRead |
1322 WHvMapGpaRangeFlagExecute |
1323 (rom ? 0 : WHvMapGpaRangeFlagWrite)));
1324 } else {
1325 hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
1326 start_pa,
1327 size);
1328 }
1329
1330 if (FAILED(hr)) {
1331 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
1332 " Host:%p, hr=%08lx",
1333 (add ? "MAP" : "UNMAP"), name,
1334 (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
1335 }
1336 }
1337
1338 static void whpx_process_section(MemoryRegionSection *section, int add)
1339 {
1340 MemoryRegion *mr = section->mr;
1341 hwaddr start_pa = section->offset_within_address_space;
1342 ram_addr_t size = int128_get64(section->size);
1343 unsigned int delta;
1344 uint64_t host_va;
1345
1346 if (!memory_region_is_ram(mr)) {
1347 return;
1348 }
1349
1350 delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
1351 delta &= ~qemu_real_host_page_mask;
1352 if (delta > size) {
1353 return;
1354 }
1355 start_pa += delta;
1356 size -= delta;
1357 size &= qemu_real_host_page_mask;
1358 if (!size || (start_pa & ~qemu_real_host_page_mask)) {
1359 return;
1360 }
1361
1362 host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
1363 + section->offset_within_region + delta;
1364
1365 whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
1366 memory_region_is_rom(mr), mr->name);
1367 }
1368
1369 static void whpx_region_add(MemoryListener *listener,
1370 MemoryRegionSection *section)
1371 {
1372 memory_region_ref(section->mr);
1373 whpx_process_section(section, 1);
1374 }
1375
1376 static void whpx_region_del(MemoryListener *listener,
1377 MemoryRegionSection *section)
1378 {
1379 whpx_process_section(section, 0);
1380 memory_region_unref(section->mr);
1381 }
1382
1383 static void whpx_transaction_begin(MemoryListener *listener)
1384 {
1385 }
1386
1387 static void whpx_transaction_commit(MemoryListener *listener)
1388 {
1389 }
1390
1391 static void whpx_log_sync(MemoryListener *listener,
1392 MemoryRegionSection *section)
1393 {
1394 MemoryRegion *mr = section->mr;
1395
1396 if (!memory_region_is_ram(mr)) {
1397 return;
1398 }
1399
1400 memory_region_set_dirty(mr, 0, int128_get64(section->size));
1401 }
1402
1403 static MemoryListener whpx_memory_listener = {
1404 .begin = whpx_transaction_begin,
1405 .commit = whpx_transaction_commit,
1406 .region_add = whpx_region_add,
1407 .region_del = whpx_region_del,
1408 .log_sync = whpx_log_sync,
1409 .priority = 10,
1410 };
1411
1412 static void whpx_memory_init(void)
1413 {
1414 memory_listener_register(&whpx_memory_listener, &address_space_memory);
1415 }
1416
1417 static void whpx_handle_interrupt(CPUState *cpu, int mask)
1418 {
1419 cpu->interrupt_request |= mask;
1420
1421 if (!qemu_cpu_is_self(cpu)) {
1422 qemu_cpu_kick(cpu);
1423 }
1424 }
1425
1426 /*
1427 * Load the functions from the given library, using the given handle. If a
1428 * handle is provided, it is used, otherwise the library is opened. The
1429 * handle will be updated on return with the opened one.
1430 */
1431 static bool load_whp_dispatch_fns(HMODULE *handle,
1432 WHPFunctionList function_list)
1433 {
1434 HMODULE hLib = *handle;
1435
1436 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
1437 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
1438 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
1439 whp_dispatch.function_name = \
1440 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1441
1442 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
1443 whp_dispatch.function_name = \
1444 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1445 if (!whp_dispatch.function_name) { \
1446 error_report("Could not load function %s", #function_name); \
1447 goto error; \
1448 } \
1449
1450 #define WHP_LOAD_LIB(lib_name, handle_lib) \
1451 if (!handle_lib) { \
1452 handle_lib = LoadLibrary(lib_name); \
1453 if (!handle_lib) { \
1454 error_report("Could not load library %s.", lib_name); \
1455 goto error; \
1456 } \
1457 } \
1458
1459 switch (function_list) {
1460 case WINHV_PLATFORM_FNS_DEFAULT:
1461 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1462 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
1463 break;
1464
1465 case WINHV_EMULATION_FNS_DEFAULT:
1466 WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
1467 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
1468 break;
1469
1470 case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
1471 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1472 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
1473 break;
1474 }
1475
1476 *handle = hLib;
1477 return true;
1478
1479 error:
1480 if (hLib) {
1481 FreeLibrary(hLib);
1482 }
1483
1484 return false;
1485 }
1486
1487 /*
1488 * Partition support
1489 */
1490
1491 static int whpx_accel_init(MachineState *ms)
1492 {
1493 struct whpx_state *whpx;
1494 int ret;
1495 HRESULT hr;
1496 WHV_CAPABILITY whpx_cap;
1497 UINT32 whpx_cap_size;
1498 WHV_PARTITION_PROPERTY prop;
1499
1500 whpx = &whpx_global;
1501
1502 if (!init_whp_dispatch()) {
1503 ret = -ENOSYS;
1504 goto error;
1505 }
1506
1507 memset(whpx, 0, sizeof(struct whpx_state));
1508 whpx->mem_quota = ms->ram_size;
1509
1510 hr = whp_dispatch.WHvGetCapability(
1511 WHvCapabilityCodeHypervisorPresent, &whpx_cap,
1512 sizeof(whpx_cap), &whpx_cap_size);
1513 if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
1514 error_report("WHPX: No accelerator found, hr=%08lx", hr);
1515 ret = -ENOSPC;
1516 goto error;
1517 }
1518
1519 hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
1520 if (FAILED(hr)) {
1521 error_report("WHPX: Failed to create partition, hr=%08lx", hr);
1522 ret = -EINVAL;
1523 goto error;
1524 }
1525
1526 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1527 prop.ProcessorCount = ms->smp.cpus;
1528 hr = whp_dispatch.WHvSetPartitionProperty(
1529 whpx->partition,
1530 WHvPartitionPropertyCodeProcessorCount,
1531 &prop,
1532 sizeof(WHV_PARTITION_PROPERTY));
1533
1534 if (FAILED(hr)) {
1535 error_report("WHPX: Failed to set partition core count to %d,"
1536 " hr=%08lx", ms->smp.cores, hr);
1537 ret = -EINVAL;
1538 goto error;
1539 }
1540
1541 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1542 prop.ExtendedVmExits.X64MsrExit = 1;
1543 prop.ExtendedVmExits.X64CpuidExit = 1;
1544 hr = whp_dispatch.WHvSetPartitionProperty(
1545 whpx->partition,
1546 WHvPartitionPropertyCodeExtendedVmExits,
1547 &prop,
1548 sizeof(WHV_PARTITION_PROPERTY));
1549
1550 if (FAILED(hr)) {
1551 error_report("WHPX: Failed to enable partition extended X64MsrExit and"
1552 " X64CpuidExit hr=%08lx", hr);
1553 ret = -EINVAL;
1554 goto error;
1555 }
1556
1557 UINT32 cpuidExitList[] = {1, 0x80000001};
1558 hr = whp_dispatch.WHvSetPartitionProperty(
1559 whpx->partition,
1560 WHvPartitionPropertyCodeCpuidExitList,
1561 cpuidExitList,
1562 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1563
1564 if (FAILED(hr)) {
1565 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1566 hr);
1567 ret = -EINVAL;
1568 goto error;
1569 }
1570
1571 hr = whp_dispatch.WHvSetupPartition(whpx->partition);
1572 if (FAILED(hr)) {
1573 error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
1574 ret = -EINVAL;
1575 goto error;
1576 }
1577
1578 whpx_memory_init();
1579
1580 cpu_interrupt_handler = whpx_handle_interrupt;
1581
1582 printf("Windows Hypervisor Platform accelerator is operational\n");
1583 return 0;
1584
1585 error:
1586
1587 if (NULL != whpx->partition) {
1588 whp_dispatch.WHvDeletePartition(whpx->partition);
1589 whpx->partition = NULL;
1590 }
1591
1592
1593 return ret;
1594 }
1595
1596 int whpx_enabled(void)
1597 {
1598 return whpx_allowed;
1599 }
1600
1601 static void whpx_accel_class_init(ObjectClass *oc, void *data)
1602 {
1603 AccelClass *ac = ACCEL_CLASS(oc);
1604 ac->name = "WHPX";
1605 ac->init_machine = whpx_accel_init;
1606 ac->allowed = &whpx_allowed;
1607 }
1608
1609 static const TypeInfo whpx_accel_type = {
1610 .name = ACCEL_CLASS_NAME("whpx"),
1611 .parent = TYPE_ACCEL,
1612 .class_init = whpx_accel_class_init,
1613 };
1614
1615 static void whpx_type_init(void)
1616 {
1617 type_register_static(&whpx_accel_type);
1618 }
1619
1620 bool init_whp_dispatch(void)
1621 {
1622 if (whp_dispatch_initialized) {
1623 return true;
1624 }
1625
1626 if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
1627 goto error;
1628 }
1629
1630 if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
1631 goto error;
1632 }
1633
1634 assert(load_whp_dispatch_fns(&hWinHvPlatform,
1635 WINHV_PLATFORM_FNS_SUPPLEMENTAL));
1636 whp_dispatch_initialized = true;
1637
1638 return true;
1639 error:
1640 if (hWinHvPlatform) {
1641 FreeLibrary(hWinHvPlatform);
1642 }
1643
1644 if (hWinHvEmulation) {
1645 FreeLibrary(hWinHvEmulation);
1646 }
1647
1648 return false;
1649 }
1650
1651 type_init(whpx_type_init);