migration: increase max-bandwidth to 128 MiB/s (1 Gib/s)
[qemu.git] / target / s390x / mmu_helper.c
1 /*
2 * S390x MMU related functions
3 *
4 * Copyright (c) 2011 Alexander Graf
5 * Copyright (c) 2015 Thomas Huth, IBM Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 */
17
18 #include "qemu/osdep.h"
19 #include "qemu/error-report.h"
20 #include "exec/address-spaces.h"
21 #include "cpu.h"
22 #include "internal.h"
23 #include "kvm_s390x.h"
24 #include "sysemu/kvm.h"
25 #include "sysemu/tcg.h"
26 #include "exec/exec-all.h"
27 #include "trace.h"
28 #include "hw/hw.h"
29 #include "hw/s390x/storage-keys.h"
30
31 /* Fetch/store bits in the translation exception code: */
32 #define FS_READ 0x800
33 #define FS_WRITE 0x400
34
35 static void trigger_access_exception(CPUS390XState *env, uint32_t type,
36 uint64_t tec)
37 {
38 S390CPU *cpu = env_archcpu(env);
39
40 if (kvm_enabled()) {
41 kvm_s390_access_exception(cpu, type, tec);
42 } else {
43 CPUState *cs = env_cpu(env);
44 if (type != PGM_ADDRESSING) {
45 stq_phys(cs->as, env->psa + offsetof(LowCore, trans_exc_code), tec);
46 }
47 trigger_pgm_exception(env, type);
48 }
49 }
50
51 /* check whether the address would be proteted by Low-Address Protection */
52 static bool is_low_address(uint64_t addr)
53 {
54 return addr <= 511 || (addr >= 4096 && addr <= 4607);
55 }
56
57 /* check whether Low-Address Protection is enabled for mmu_translate() */
58 static bool lowprot_enabled(const CPUS390XState *env, uint64_t asc)
59 {
60 if (!(env->cregs[0] & CR0_LOWPROT)) {
61 return false;
62 }
63 if (!(env->psw.mask & PSW_MASK_DAT)) {
64 return true;
65 }
66
67 /* Check the private-space control bit */
68 switch (asc) {
69 case PSW_ASC_PRIMARY:
70 return !(env->cregs[1] & ASCE_PRIVATE_SPACE);
71 case PSW_ASC_SECONDARY:
72 return !(env->cregs[7] & ASCE_PRIVATE_SPACE);
73 case PSW_ASC_HOME:
74 return !(env->cregs[13] & ASCE_PRIVATE_SPACE);
75 default:
76 /* We don't support access register mode */
77 error_report("unsupported addressing mode");
78 exit(1);
79 }
80 }
81
82 /**
83 * Translate real address to absolute (= physical)
84 * address by taking care of the prefix mapping.
85 */
86 target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr)
87 {
88 if (raddr < 0x2000) {
89 return raddr + env->psa; /* Map the lowcore. */
90 } else if (raddr >= env->psa && raddr < env->psa + 0x2000) {
91 return raddr - env->psa; /* Map the 0 page. */
92 }
93 return raddr;
94 }
95
96 static inline bool read_table_entry(CPUS390XState *env, hwaddr gaddr,
97 uint64_t *entry)
98 {
99 CPUState *cs = env_cpu(env);
100
101 /*
102 * According to the PoP, these table addresses are "unpredictably real
103 * or absolute". Also, "it is unpredictable whether the address wraps
104 * or an addressing exception is recognized".
105 *
106 * We treat them as absolute addresses and don't wrap them.
107 */
108 if (unlikely(address_space_read(cs->as, gaddr, MEMTXATTRS_UNSPECIFIED,
109 entry, sizeof(*entry)) !=
110 MEMTX_OK)) {
111 return false;
112 }
113 *entry = be64_to_cpu(*entry);
114 return true;
115 }
116
117 static int mmu_translate_asce(CPUS390XState *env, target_ulong vaddr,
118 uint64_t asc, uint64_t asce, target_ulong *raddr,
119 int *flags, int rw)
120 {
121 const bool edat1 = (env->cregs[0] & CR0_EDAT) &&
122 s390_has_feat(S390_FEAT_EDAT);
123 const bool edat2 = edat1 && s390_has_feat(S390_FEAT_EDAT_2);
124 const bool iep = (env->cregs[0] & CR0_IEP) &&
125 s390_has_feat(S390_FEAT_INSTRUCTION_EXEC_PROT);
126 const int asce_tl = asce & ASCE_TABLE_LENGTH;
127 const int asce_p = asce & ASCE_PRIVATE_SPACE;
128 hwaddr gaddr = asce & ASCE_ORIGIN;
129 uint64_t entry;
130
131 if (asce & ASCE_REAL_SPACE) {
132 /* direct mapping */
133 *raddr = vaddr;
134 return 0;
135 }
136
137 switch (asce & ASCE_TYPE_MASK) {
138 case ASCE_TYPE_REGION1:
139 if (VADDR_REGION1_TL(vaddr) > asce_tl) {
140 return PGM_REG_FIRST_TRANS;
141 }
142 gaddr += VADDR_REGION1_TX(vaddr) * 8;
143 break;
144 case ASCE_TYPE_REGION2:
145 if (VADDR_REGION1_TX(vaddr)) {
146 return PGM_ASCE_TYPE;
147 }
148 if (VADDR_REGION2_TL(vaddr) > asce_tl) {
149 return PGM_REG_SEC_TRANS;
150 }
151 gaddr += VADDR_REGION2_TX(vaddr) * 8;
152 break;
153 case ASCE_TYPE_REGION3:
154 if (VADDR_REGION1_TX(vaddr) || VADDR_REGION2_TX(vaddr)) {
155 return PGM_ASCE_TYPE;
156 }
157 if (VADDR_REGION3_TL(vaddr) > asce_tl) {
158 return PGM_REG_THIRD_TRANS;
159 }
160 gaddr += VADDR_REGION3_TX(vaddr) * 8;
161 break;
162 case ASCE_TYPE_SEGMENT:
163 if (VADDR_REGION1_TX(vaddr) || VADDR_REGION2_TX(vaddr) ||
164 VADDR_REGION3_TX(vaddr)) {
165 return PGM_ASCE_TYPE;
166 }
167 if (VADDR_SEGMENT_TL(vaddr) > asce_tl) {
168 return PGM_SEGMENT_TRANS;
169 }
170 gaddr += VADDR_SEGMENT_TX(vaddr) * 8;
171 break;
172 }
173
174 switch (asce & ASCE_TYPE_MASK) {
175 case ASCE_TYPE_REGION1:
176 if (!read_table_entry(env, gaddr, &entry)) {
177 return PGM_ADDRESSING;
178 }
179 if (entry & REGION_ENTRY_I) {
180 return PGM_REG_FIRST_TRANS;
181 }
182 if ((entry & REGION_ENTRY_TT) != REGION_ENTRY_TT_REGION1) {
183 return PGM_TRANS_SPEC;
184 }
185 if (VADDR_REGION2_TL(vaddr) < (entry & REGION_ENTRY_TF) >> 6 ||
186 VADDR_REGION2_TL(vaddr) > (entry & REGION_ENTRY_TL)) {
187 return PGM_REG_SEC_TRANS;
188 }
189 if (edat1 && (entry & REGION_ENTRY_P)) {
190 *flags &= ~PAGE_WRITE;
191 }
192 gaddr = (entry & REGION_ENTRY_ORIGIN) + VADDR_REGION2_TX(vaddr) * 8;
193 /* fall through */
194 case ASCE_TYPE_REGION2:
195 if (!read_table_entry(env, gaddr, &entry)) {
196 return PGM_ADDRESSING;
197 }
198 if (entry & REGION_ENTRY_I) {
199 return PGM_REG_SEC_TRANS;
200 }
201 if ((entry & REGION_ENTRY_TT) != REGION_ENTRY_TT_REGION2) {
202 return PGM_TRANS_SPEC;
203 }
204 if (VADDR_REGION3_TL(vaddr) < (entry & REGION_ENTRY_TF) >> 6 ||
205 VADDR_REGION3_TL(vaddr) > (entry & REGION_ENTRY_TL)) {
206 return PGM_REG_THIRD_TRANS;
207 }
208 if (edat1 && (entry & REGION_ENTRY_P)) {
209 *flags &= ~PAGE_WRITE;
210 }
211 gaddr = (entry & REGION_ENTRY_ORIGIN) + VADDR_REGION3_TX(vaddr) * 8;
212 /* fall through */
213 case ASCE_TYPE_REGION3:
214 if (!read_table_entry(env, gaddr, &entry)) {
215 return PGM_ADDRESSING;
216 }
217 if (entry & REGION_ENTRY_I) {
218 return PGM_REG_THIRD_TRANS;
219 }
220 if ((entry & REGION_ENTRY_TT) != REGION_ENTRY_TT_REGION3) {
221 return PGM_TRANS_SPEC;
222 }
223 if (edat2 && (entry & REGION3_ENTRY_CR) && asce_p) {
224 return PGM_TRANS_SPEC;
225 }
226 if (edat1 && (entry & REGION_ENTRY_P)) {
227 *flags &= ~PAGE_WRITE;
228 }
229 if (edat2 && (entry & REGION3_ENTRY_FC)) {
230 if (iep && (entry & REGION3_ENTRY_IEP)) {
231 *flags &= ~PAGE_EXEC;
232 }
233 *raddr = (entry & REGION3_ENTRY_RFAA) |
234 (vaddr & ~REGION3_ENTRY_RFAA);
235 return 0;
236 }
237 if (VADDR_SEGMENT_TL(vaddr) < (entry & REGION_ENTRY_TF) >> 6 ||
238 VADDR_SEGMENT_TL(vaddr) > (entry & REGION_ENTRY_TL)) {
239 return PGM_SEGMENT_TRANS;
240 }
241 gaddr = (entry & REGION_ENTRY_ORIGIN) + VADDR_SEGMENT_TX(vaddr) * 8;
242 /* fall through */
243 case ASCE_TYPE_SEGMENT:
244 if (!read_table_entry(env, gaddr, &entry)) {
245 return PGM_ADDRESSING;
246 }
247 if (entry & SEGMENT_ENTRY_I) {
248 return PGM_SEGMENT_TRANS;
249 }
250 if ((entry & SEGMENT_ENTRY_TT) != SEGMENT_ENTRY_TT_SEGMENT) {
251 return PGM_TRANS_SPEC;
252 }
253 if ((entry & SEGMENT_ENTRY_CS) && asce_p) {
254 return PGM_TRANS_SPEC;
255 }
256 if (entry & SEGMENT_ENTRY_P) {
257 *flags &= ~PAGE_WRITE;
258 }
259 if (edat1 && (entry & SEGMENT_ENTRY_FC)) {
260 if (iep && (entry & SEGMENT_ENTRY_IEP)) {
261 *flags &= ~PAGE_EXEC;
262 }
263 *raddr = (entry & SEGMENT_ENTRY_SFAA) |
264 (vaddr & ~SEGMENT_ENTRY_SFAA);
265 return 0;
266 }
267 gaddr = (entry & SEGMENT_ENTRY_ORIGIN) + VADDR_PAGE_TX(vaddr) * 8;
268 break;
269 }
270
271 if (!read_table_entry(env, gaddr, &entry)) {
272 return PGM_ADDRESSING;
273 }
274 if (entry & PAGE_ENTRY_I) {
275 return PGM_PAGE_TRANS;
276 }
277 if (entry & PAGE_ENTRY_0) {
278 return PGM_TRANS_SPEC;
279 }
280 if (entry & PAGE_ENTRY_P) {
281 *flags &= ~PAGE_WRITE;
282 }
283 if (iep && (entry & PAGE_ENTRY_IEP)) {
284 *flags &= ~PAGE_EXEC;
285 }
286
287 *raddr = entry & TARGET_PAGE_MASK;
288 return 0;
289 }
290
291 static void mmu_handle_skey(target_ulong addr, int rw, int *flags)
292 {
293 static S390SKeysClass *skeyclass;
294 static S390SKeysState *ss;
295 uint8_t key;
296 int rc;
297
298 if (unlikely(addr >= ram_size)) {
299 return;
300 }
301
302 if (unlikely(!ss)) {
303 ss = s390_get_skeys_device();
304 skeyclass = S390_SKEYS_GET_CLASS(ss);
305 }
306
307 /*
308 * Whenever we create a new TLB entry, we set the storage key reference
309 * bit. In case we allow write accesses, we set the storage key change
310 * bit. Whenever the guest changes the storage key, we have to flush the
311 * TLBs of all CPUs (the whole TLB or all affected entries), so that the
312 * next reference/change will result in an MMU fault and make us properly
313 * update the storage key here.
314 *
315 * Note 1: "record of references ... is not necessarily accurate",
316 * "change bit may be set in case no storing has occurred".
317 * -> We can set reference/change bits even on exceptions.
318 * Note 2: certain accesses seem to ignore storage keys. For example,
319 * DAT translation does not set reference bits for table accesses.
320 *
321 * TODO: key-controlled protection. Only CPU accesses make use of the
322 * PSW key. CSS accesses are different - we have to pass in the key.
323 *
324 * TODO: we have races between getting and setting the key.
325 */
326 rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
327 if (rc) {
328 trace_get_skeys_nonzero(rc);
329 return;
330 }
331
332 switch (rw) {
333 case MMU_DATA_LOAD:
334 case MMU_INST_FETCH:
335 /*
336 * The TLB entry has to remain write-protected on read-faults if
337 * the storage key does not indicate a change already. Otherwise
338 * we might miss setting the change bit on write accesses.
339 */
340 if (!(key & SK_C)) {
341 *flags &= ~PAGE_WRITE;
342 }
343 break;
344 case MMU_DATA_STORE:
345 key |= SK_C;
346 break;
347 default:
348 g_assert_not_reached();
349 }
350
351 /* Any store/fetch sets the reference bit */
352 key |= SK_R;
353
354 rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
355 if (rc) {
356 trace_set_skeys_nonzero(rc);
357 }
358 }
359
360 /**
361 * Translate a virtual (logical) address into a physical (absolute) address.
362 * @param vaddr the virtual address
363 * @param rw 0 = read, 1 = write, 2 = code fetch
364 * @param asc address space control (one of the PSW_ASC_* modes)
365 * @param raddr the translated address is stored to this pointer
366 * @param flags the PAGE_READ/WRITE/EXEC flags are stored to this pointer
367 * @param exc true = inject a program check if a fault occurred
368 * @return 0 = success, != 0, the exception to raise
369 */
370 int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
371 target_ulong *raddr, int *flags, uint64_t *tec)
372 {
373 uint64_t asce;
374 int r;
375
376 *tec = (vaddr & TARGET_PAGE_MASK) | (asc >> 46) |
377 (rw == MMU_DATA_STORE ? FS_WRITE : FS_READ);
378 *flags = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
379
380 if (is_low_address(vaddr & TARGET_PAGE_MASK) && lowprot_enabled(env, asc)) {
381 /*
382 * If any part of this page is currently protected, make sure the
383 * TLB entry will not be reused.
384 *
385 * As the protected range is always the first 512 bytes of the
386 * two first pages, we are able to catch all writes to these areas
387 * just by looking at the start address (triggering the tlb miss).
388 */
389 *flags |= PAGE_WRITE_INV;
390 if (is_low_address(vaddr) && rw == MMU_DATA_STORE) {
391 /* LAP sets bit 56 */
392 *tec |= 0x80;
393 return PGM_PROTECTION;
394 }
395 }
396
397 vaddr &= TARGET_PAGE_MASK;
398
399 if (!(env->psw.mask & PSW_MASK_DAT)) {
400 *raddr = vaddr;
401 goto nodat;
402 }
403
404 switch (asc) {
405 case PSW_ASC_PRIMARY:
406 asce = env->cregs[1];
407 break;
408 case PSW_ASC_HOME:
409 asce = env->cregs[13];
410 break;
411 case PSW_ASC_SECONDARY:
412 asce = env->cregs[7];
413 break;
414 case PSW_ASC_ACCREG:
415 default:
416 hw_error("guest switched to unknown asc mode\n");
417 break;
418 }
419
420 /* perform the DAT translation */
421 r = mmu_translate_asce(env, vaddr, asc, asce, raddr, flags, rw);
422 if (unlikely(r)) {
423 return r;
424 }
425
426 /* check for DAT protection */
427 if (unlikely(rw == MMU_DATA_STORE && !(*flags & PAGE_WRITE))) {
428 /* DAT sets bit 61 only */
429 *tec |= 0x4;
430 return PGM_PROTECTION;
431 }
432
433 /* check for Instruction-Execution-Protection */
434 if (unlikely(rw == MMU_INST_FETCH && !(*flags & PAGE_EXEC))) {
435 /* IEP sets bit 56 and 61 */
436 *tec |= 0x84;
437 return PGM_PROTECTION;
438 }
439
440 nodat:
441 /* Convert real address -> absolute address */
442 *raddr = mmu_real2abs(env, *raddr);
443
444 mmu_handle_skey(*raddr, rw, flags);
445 return 0;
446 }
447
448 /**
449 * translate_pages: Translate a set of consecutive logical page addresses
450 * to absolute addresses. This function is used for TCG and old KVM without
451 * the MEMOP interface.
452 */
453 static int translate_pages(S390CPU *cpu, vaddr addr, int nr_pages,
454 target_ulong *pages, bool is_write, uint64_t *tec)
455 {
456 uint64_t asc = cpu->env.psw.mask & PSW_MASK_ASC;
457 CPUS390XState *env = &cpu->env;
458 int ret, i, pflags;
459
460 for (i = 0; i < nr_pages; i++) {
461 ret = mmu_translate(env, addr, is_write, asc, &pages[i], &pflags, tec);
462 if (ret) {
463 return ret;
464 }
465 if (!address_space_access_valid(&address_space_memory, pages[i],
466 TARGET_PAGE_SIZE, is_write,
467 MEMTXATTRS_UNSPECIFIED)) {
468 *tec = 0; /* unused */
469 return PGM_ADDRESSING;
470 }
471 addr += TARGET_PAGE_SIZE;
472 }
473
474 return 0;
475 }
476
477 int s390_cpu_pv_mem_rw(S390CPU *cpu, unsigned int offset, void *hostbuf,
478 int len, bool is_write)
479 {
480 int ret;
481
482 if (kvm_enabled()) {
483 ret = kvm_s390_mem_op_pv(cpu, offset, hostbuf, len, is_write);
484 } else {
485 /* Protected Virtualization is a KVM/Hardware only feature */
486 g_assert_not_reached();
487 }
488 return ret;
489 }
490
491 /**
492 * s390_cpu_virt_mem_rw:
493 * @laddr: the logical start address
494 * @ar: the access register number
495 * @hostbuf: buffer in host memory. NULL = do only checks w/o copying
496 * @len: length that should be transferred
497 * @is_write: true = write, false = read
498 * Returns: 0 on success, non-zero if an exception occurred
499 *
500 * Copy from/to guest memory using logical addresses. Note that we inject a
501 * program interrupt in case there is an error while accessing the memory.
502 *
503 * This function will always return (also for TCG), make sure to call
504 * s390_cpu_virt_mem_handle_exc() to properly exit the CPU loop.
505 */
506 int s390_cpu_virt_mem_rw(S390CPU *cpu, vaddr laddr, uint8_t ar, void *hostbuf,
507 int len, bool is_write)
508 {
509 int currlen, nr_pages, i;
510 target_ulong *pages;
511 uint64_t tec;
512 int ret;
513
514 if (kvm_enabled()) {
515 ret = kvm_s390_mem_op(cpu, laddr, ar, hostbuf, len, is_write);
516 if (ret >= 0) {
517 return ret;
518 }
519 }
520
521 nr_pages = (((laddr & ~TARGET_PAGE_MASK) + len - 1) >> TARGET_PAGE_BITS)
522 + 1;
523 pages = g_malloc(nr_pages * sizeof(*pages));
524
525 ret = translate_pages(cpu, laddr, nr_pages, pages, is_write, &tec);
526 if (ret) {
527 trigger_access_exception(&cpu->env, ret, tec);
528 } else if (hostbuf != NULL) {
529 /* Copy data by stepping through the area page by page */
530 for (i = 0; i < nr_pages; i++) {
531 currlen = MIN(len, TARGET_PAGE_SIZE - (laddr % TARGET_PAGE_SIZE));
532 cpu_physical_memory_rw(pages[i] | (laddr & ~TARGET_PAGE_MASK),
533 hostbuf, currlen, is_write);
534 laddr += currlen;
535 hostbuf += currlen;
536 len -= currlen;
537 }
538 }
539
540 g_free(pages);
541 return ret;
542 }
543
544 void s390_cpu_virt_mem_handle_exc(S390CPU *cpu, uintptr_t ra)
545 {
546 /* KVM will handle the interrupt automatically, TCG has to exit the TB */
547 #ifdef CONFIG_TCG
548 if (tcg_enabled()) {
549 cpu_loop_exit_restore(CPU(cpu), ra);
550 }
551 #endif
552 }
553
554 /**
555 * Translate a real address into a physical (absolute) address.
556 * @param raddr the real address
557 * @param rw 0 = read, 1 = write, 2 = code fetch
558 * @param addr the translated address is stored to this pointer
559 * @param flags the PAGE_READ/WRITE/EXEC flags are stored to this pointer
560 * @return 0 = success, != 0, the exception to raise
561 */
562 int mmu_translate_real(CPUS390XState *env, target_ulong raddr, int rw,
563 target_ulong *addr, int *flags, uint64_t *tec)
564 {
565 const bool lowprot_enabled = env->cregs[0] & CR0_LOWPROT;
566
567 *flags = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
568 if (is_low_address(raddr & TARGET_PAGE_MASK) && lowprot_enabled) {
569 /* see comment in mmu_translate() how this works */
570 *flags |= PAGE_WRITE_INV;
571 if (is_low_address(raddr) && rw == MMU_DATA_STORE) {
572 /* LAP sets bit 56 */
573 *tec = (raddr & TARGET_PAGE_MASK) | FS_WRITE | 0x80;
574 return PGM_PROTECTION;
575 }
576 }
577
578 *addr = mmu_real2abs(env, raddr & TARGET_PAGE_MASK);
579
580 mmu_handle_skey(*addr, rw, flags);
581 return 0;
582 }