Merge remote-tracking branch 'remotes/dagrh/tags/pull-virtiofs-20211026' into staging
[qemu.git] / scripts / decodetree.py
1 #!/usr/bin/env python3
2 # Copyright (c) 2018 Linaro Limited
3 #
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
8 #
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
13 #
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 #
17
18 #
19 # Generate a decoding tree from a specification file.
20 # See the syntax and semantics in docs/devel/decodetree.rst.
21 #
22
23 import io
24 import os
25 import re
26 import sys
27 import getopt
28
29 insnwidth = 32
30 bitop_width = 32
31 insnmask = 0xffffffff
32 variablewidth = False
33 fields = {}
34 arguments = {}
35 formats = {}
36 allpatterns = []
37 anyextern = False
38
39 translate_prefix = 'trans'
40 translate_scope = 'static '
41 input_file = ''
42 output_file = None
43 output_fd = None
44 insntype = 'uint32_t'
45 decode_function = 'decode'
46
47 # An identifier for C.
48 re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
49
50 # Identifiers for Arguments, Fields, Formats and Patterns.
51 re_arg_ident = '&[a-zA-Z0-9_]*'
52 re_fld_ident = '%[a-zA-Z0-9_]*'
53 re_fmt_ident = '@[a-zA-Z0-9_]*'
54 re_pat_ident = '[a-zA-Z0-9_]*'
55
56 def error_with_file(file, lineno, *args):
57 """Print an error message from file:line and args and exit."""
58 global output_file
59 global output_fd
60
61 prefix = ''
62 if file:
63 prefix += f'{file}:'
64 if lineno:
65 prefix += f'{lineno}:'
66 if prefix:
67 prefix += ' '
68 print(prefix, end='error: ', file=sys.stderr)
69 print(*args, file=sys.stderr)
70
71 if output_file and output_fd:
72 output_fd.close()
73 os.remove(output_file)
74 exit(1)
75 # end error_with_file
76
77
78 def error(lineno, *args):
79 error_with_file(input_file, lineno, *args)
80 # end error
81
82
83 def output(*args):
84 global output_fd
85 for a in args:
86 output_fd.write(a)
87
88
89 def output_autogen():
90 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
91
92
93 def str_indent(c):
94 """Return a string with C spaces"""
95 return ' ' * c
96
97
98 def str_fields(fields):
99 """Return a string uniquely identifying FIELDS"""
100 r = ''
101 for n in sorted(fields.keys()):
102 r += '_' + n
103 return r[1:]
104
105
106 def whex(val):
107 """Return a hex string for val padded for insnwidth"""
108 global insnwidth
109 return f'0x{val:0{insnwidth // 4}x}'
110
111
112 def whexC(val):
113 """Return a hex string for val padded for insnwidth,
114 and with the proper suffix for a C constant."""
115 suffix = ''
116 if val >= 0x100000000:
117 suffix = 'ull'
118 elif val >= 0x80000000:
119 suffix = 'u'
120 return whex(val) + suffix
121
122
123 def str_match_bits(bits, mask):
124 """Return a string pretty-printing BITS/MASK"""
125 global insnwidth
126
127 i = 1 << (insnwidth - 1)
128 space = 0x01010100
129 r = ''
130 while i != 0:
131 if i & mask:
132 if i & bits:
133 r += '1'
134 else:
135 r += '0'
136 else:
137 r += '.'
138 if i & space:
139 r += ' '
140 i >>= 1
141 return r
142
143
144 def is_pow2(x):
145 """Return true iff X is equal to a power of 2."""
146 return (x & (x - 1)) == 0
147
148
149 def ctz(x):
150 """Return the number of times 2 factors into X."""
151 assert x != 0
152 r = 0
153 while ((x >> r) & 1) == 0:
154 r += 1
155 return r
156
157
158 def is_contiguous(bits):
159 if bits == 0:
160 return -1
161 shift = ctz(bits)
162 if is_pow2((bits >> shift) + 1):
163 return shift
164 else:
165 return -1
166
167
168 def eq_fields_for_args(flds_a, arg):
169 if len(flds_a) != len(arg.fields):
170 return False
171 # Only allow inference on default types
172 for t in arg.types:
173 if t != 'int':
174 return False
175 for k, a in flds_a.items():
176 if k not in arg.fields:
177 return False
178 return True
179
180
181 def eq_fields_for_fmts(flds_a, flds_b):
182 if len(flds_a) != len(flds_b):
183 return False
184 for k, a in flds_a.items():
185 if k not in flds_b:
186 return False
187 b = flds_b[k]
188 if a.__class__ != b.__class__ or a != b:
189 return False
190 return True
191
192
193 class Field:
194 """Class representing a simple instruction field"""
195 def __init__(self, sign, pos, len):
196 self.sign = sign
197 self.pos = pos
198 self.len = len
199 self.mask = ((1 << len) - 1) << pos
200
201 def __str__(self):
202 if self.sign:
203 s = 's'
204 else:
205 s = ''
206 return str(self.pos) + ':' + s + str(self.len)
207
208 def str_extract(self):
209 global bitop_width
210 s = 's' if self.sign else ''
211 return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
212
213 def __eq__(self, other):
214 return self.sign == other.sign and self.mask == other.mask
215
216 def __ne__(self, other):
217 return not self.__eq__(other)
218 # end Field
219
220
221 class MultiField:
222 """Class representing a compound instruction field"""
223 def __init__(self, subs, mask):
224 self.subs = subs
225 self.sign = subs[0].sign
226 self.mask = mask
227
228 def __str__(self):
229 return str(self.subs)
230
231 def str_extract(self):
232 global bitop_width
233 ret = '0'
234 pos = 0
235 for f in reversed(self.subs):
236 ext = f.str_extract()
237 if pos == 0:
238 ret = ext
239 else:
240 ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
241 pos += f.len
242 return ret
243
244 def __ne__(self, other):
245 if len(self.subs) != len(other.subs):
246 return True
247 for a, b in zip(self.subs, other.subs):
248 if a.__class__ != b.__class__ or a != b:
249 return True
250 return False
251
252 def __eq__(self, other):
253 return not self.__ne__(other)
254 # end MultiField
255
256
257 class ConstField:
258 """Class representing an argument field with constant value"""
259 def __init__(self, value):
260 self.value = value
261 self.mask = 0
262 self.sign = value < 0
263
264 def __str__(self):
265 return str(self.value)
266
267 def str_extract(self):
268 return str(self.value)
269
270 def __cmp__(self, other):
271 return self.value - other.value
272 # end ConstField
273
274
275 class FunctionField:
276 """Class representing a field passed through a function"""
277 def __init__(self, func, base):
278 self.mask = base.mask
279 self.sign = base.sign
280 self.base = base
281 self.func = func
282
283 def __str__(self):
284 return self.func + '(' + str(self.base) + ')'
285
286 def str_extract(self):
287 return self.func + '(ctx, ' + self.base.str_extract() + ')'
288
289 def __eq__(self, other):
290 return self.func == other.func and self.base == other.base
291
292 def __ne__(self, other):
293 return not self.__eq__(other)
294 # end FunctionField
295
296
297 class ParameterField:
298 """Class representing a pseudo-field read from a function"""
299 def __init__(self, func):
300 self.mask = 0
301 self.sign = 0
302 self.func = func
303
304 def __str__(self):
305 return self.func
306
307 def str_extract(self):
308 return self.func + '(ctx)'
309
310 def __eq__(self, other):
311 return self.func == other.func
312
313 def __ne__(self, other):
314 return not self.__eq__(other)
315 # end ParameterField
316
317
318 class Arguments:
319 """Class representing the extracted fields of a format"""
320 def __init__(self, nm, flds, types, extern):
321 self.name = nm
322 self.extern = extern
323 self.fields = flds
324 self.types = types
325
326 def __str__(self):
327 return self.name + ' ' + str(self.fields)
328
329 def struct_name(self):
330 return 'arg_' + self.name
331
332 def output_def(self):
333 if not self.extern:
334 output('typedef struct {\n')
335 for (n, t) in zip(self.fields, self.types):
336 output(f' {t} {n};\n')
337 output('} ', self.struct_name(), ';\n\n')
338 # end Arguments
339
340
341 class General:
342 """Common code between instruction formats and instruction patterns"""
343 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
344 self.name = name
345 self.file = input_file
346 self.lineno = lineno
347 self.base = base
348 self.fixedbits = fixb
349 self.fixedmask = fixm
350 self.undefmask = udfm
351 self.fieldmask = fldm
352 self.fields = flds
353 self.width = w
354
355 def __str__(self):
356 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
357
358 def str1(self, i):
359 return str_indent(i) + self.__str__()
360 # end General
361
362
363 class Format(General):
364 """Class representing an instruction format"""
365
366 def extract_name(self):
367 global decode_function
368 return decode_function + '_extract_' + self.name
369
370 def output_extract(self):
371 output('static void ', self.extract_name(), '(DisasContext *ctx, ',
372 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
373 for n, f in self.fields.items():
374 output(' a->', n, ' = ', f.str_extract(), ';\n')
375 output('}\n\n')
376 # end Format
377
378
379 class Pattern(General):
380 """Class representing an instruction pattern"""
381
382 def output_decl(self):
383 global translate_scope
384 global translate_prefix
385 output('typedef ', self.base.base.struct_name(),
386 ' arg_', self.name, ';\n')
387 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
388 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
389
390 def output_code(self, i, extracted, outerbits, outermask):
391 global translate_prefix
392 ind = str_indent(i)
393 arg = self.base.base.name
394 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
395 if not extracted:
396 output(ind, self.base.extract_name(),
397 '(ctx, &u.f_', arg, ', insn);\n')
398 for n, f in self.fields.items():
399 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
400 output(ind, 'if (', translate_prefix, '_', self.name,
401 '(ctx, &u.f_', arg, ')) return true;\n')
402
403 # Normal patterns do not have children.
404 def build_tree(self):
405 return
406 def prop_masks(self):
407 return
408 def prop_format(self):
409 return
410 def prop_width(self):
411 return
412
413 # end Pattern
414
415
416 class MultiPattern(General):
417 """Class representing a set of instruction patterns"""
418
419 def __init__(self, lineno):
420 self.file = input_file
421 self.lineno = lineno
422 self.pats = []
423 self.base = None
424 self.fixedbits = 0
425 self.fixedmask = 0
426 self.undefmask = 0
427 self.width = None
428
429 def __str__(self):
430 r = 'group'
431 if self.fixedbits is not None:
432 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
433 return r
434
435 def output_decl(self):
436 for p in self.pats:
437 p.output_decl()
438
439 def prop_masks(self):
440 global insnmask
441
442 fixedmask = insnmask
443 undefmask = insnmask
444
445 # Collect fixedmask/undefmask for all of the children.
446 for p in self.pats:
447 p.prop_masks()
448 fixedmask &= p.fixedmask
449 undefmask &= p.undefmask
450
451 # Widen fixedmask until all fixedbits match
452 repeat = True
453 fixedbits = 0
454 while repeat and fixedmask != 0:
455 fixedbits = None
456 for p in self.pats:
457 thisbits = p.fixedbits & fixedmask
458 if fixedbits is None:
459 fixedbits = thisbits
460 elif fixedbits != thisbits:
461 fixedmask &= ~(fixedbits ^ thisbits)
462 break
463 else:
464 repeat = False
465
466 self.fixedbits = fixedbits
467 self.fixedmask = fixedmask
468 self.undefmask = undefmask
469
470 def build_tree(self):
471 for p in self.pats:
472 p.build_tree()
473
474 def prop_format(self):
475 for p in self.pats:
476 p.build_tree()
477
478 def prop_width(self):
479 width = None
480 for p in self.pats:
481 p.prop_width()
482 if width is None:
483 width = p.width
484 elif width != p.width:
485 error_with_file(self.file, self.lineno,
486 'width mismatch in patterns within braces')
487 self.width = width
488
489 # end MultiPattern
490
491
492 class IncMultiPattern(MultiPattern):
493 """Class representing an overlapping set of instruction patterns"""
494
495 def output_code(self, i, extracted, outerbits, outermask):
496 global translate_prefix
497 ind = str_indent(i)
498 for p in self.pats:
499 if outermask != p.fixedmask:
500 innermask = p.fixedmask & ~outermask
501 innerbits = p.fixedbits & ~outermask
502 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
503 output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
504 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
505 output(ind, '}\n')
506 else:
507 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
508 #end IncMultiPattern
509
510
511 class Tree:
512 """Class representing a node in a decode tree"""
513
514 def __init__(self, fm, tm):
515 self.fixedmask = fm
516 self.thismask = tm
517 self.subs = []
518 self.base = None
519
520 def str1(self, i):
521 ind = str_indent(i)
522 r = ind + whex(self.fixedmask)
523 if self.format:
524 r += ' ' + self.format.name
525 r += ' [\n'
526 for (b, s) in self.subs:
527 r += ind + f' {whex(b)}:\n'
528 r += s.str1(i + 4) + '\n'
529 r += ind + ']'
530 return r
531
532 def __str__(self):
533 return self.str1(0)
534
535 def output_code(self, i, extracted, outerbits, outermask):
536 ind = str_indent(i)
537
538 # If we identified all nodes below have the same format,
539 # extract the fields now.
540 if not extracted and self.base:
541 output(ind, self.base.extract_name(),
542 '(ctx, &u.f_', self.base.base.name, ', insn);\n')
543 extracted = True
544
545 # Attempt to aid the compiler in producing compact switch statements.
546 # If the bits in the mask are contiguous, extract them.
547 sh = is_contiguous(self.thismask)
548 if sh > 0:
549 # Propagate SH down into the local functions.
550 def str_switch(b, sh=sh):
551 return f'(insn >> {sh}) & {b >> sh:#x}'
552
553 def str_case(b, sh=sh):
554 return hex(b >> sh)
555 else:
556 def str_switch(b):
557 return f'insn & {whexC(b)}'
558
559 def str_case(b):
560 return whexC(b)
561
562 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
563 for b, s in sorted(self.subs):
564 assert (self.thismask & ~s.fixedmask) == 0
565 innermask = outermask | self.thismask
566 innerbits = outerbits | b
567 output(ind, 'case ', str_case(b), ':\n')
568 output(ind, ' /* ',
569 str_match_bits(innerbits, innermask), ' */\n')
570 s.output_code(i + 4, extracted, innerbits, innermask)
571 output(ind, ' break;\n')
572 output(ind, '}\n')
573 # end Tree
574
575
576 class ExcMultiPattern(MultiPattern):
577 """Class representing a non-overlapping set of instruction patterns"""
578
579 def output_code(self, i, extracted, outerbits, outermask):
580 # Defer everything to our decomposed Tree node
581 self.tree.output_code(i, extracted, outerbits, outermask)
582
583 @staticmethod
584 def __build_tree(pats, outerbits, outermask):
585 # Find the intersection of all remaining fixedmask.
586 innermask = ~outermask & insnmask
587 for i in pats:
588 innermask &= i.fixedmask
589
590 if innermask == 0:
591 # Edge condition: One pattern covers the entire insnmask
592 if len(pats) == 1:
593 t = Tree(outermask, innermask)
594 t.subs.append((0, pats[0]))
595 return t
596
597 text = 'overlapping patterns:'
598 for p in pats:
599 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
600 error_with_file(pats[0].file, pats[0].lineno, text)
601
602 fullmask = outermask | innermask
603
604 # Sort each element of pats into the bin selected by the mask.
605 bins = {}
606 for i in pats:
607 fb = i.fixedbits & innermask
608 if fb in bins:
609 bins[fb].append(i)
610 else:
611 bins[fb] = [i]
612
613 # We must recurse if any bin has more than one element or if
614 # the single element in the bin has not been fully matched.
615 t = Tree(fullmask, innermask)
616
617 for b, l in bins.items():
618 s = l[0]
619 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
620 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
621 t.subs.append((b, s))
622
623 return t
624
625 def build_tree(self):
626 super().prop_format()
627 self.tree = self.__build_tree(self.pats, self.fixedbits,
628 self.fixedmask)
629
630 @staticmethod
631 def __prop_format(tree):
632 """Propagate Format objects into the decode tree"""
633
634 # Depth first search.
635 for (b, s) in tree.subs:
636 if isinstance(s, Tree):
637 ExcMultiPattern.__prop_format(s)
638
639 # If all entries in SUBS have the same format, then
640 # propagate that into the tree.
641 f = None
642 for (b, s) in tree.subs:
643 if f is None:
644 f = s.base
645 if f is None:
646 return
647 if f is not s.base:
648 return
649 tree.base = f
650
651 def prop_format(self):
652 super().prop_format()
653 self.__prop_format(self.tree)
654
655 # end ExcMultiPattern
656
657
658 def parse_field(lineno, name, toks):
659 """Parse one instruction field from TOKS at LINENO"""
660 global fields
661 global insnwidth
662
663 # A "simple" field will have only one entry;
664 # a "multifield" will have several.
665 subs = []
666 width = 0
667 func = None
668 for t in toks:
669 if re.match('^!function=', t):
670 if func:
671 error(lineno, 'duplicate function')
672 func = t.split('=')
673 func = func[1]
674 continue
675
676 if re.fullmatch('[0-9]+:s[0-9]+', t):
677 # Signed field extract
678 subtoks = t.split(':s')
679 sign = True
680 elif re.fullmatch('[0-9]+:[0-9]+', t):
681 # Unsigned field extract
682 subtoks = t.split(':')
683 sign = False
684 else:
685 error(lineno, f'invalid field token "{t}"')
686 po = int(subtoks[0])
687 le = int(subtoks[1])
688 if po + le > insnwidth:
689 error(lineno, f'field {t} too large')
690 f = Field(sign, po, le)
691 subs.append(f)
692 width += le
693
694 if width > insnwidth:
695 error(lineno, 'field too large')
696 if len(subs) == 0:
697 if func:
698 f = ParameterField(func)
699 else:
700 error(lineno, 'field with no value')
701 else:
702 if len(subs) == 1:
703 f = subs[0]
704 else:
705 mask = 0
706 for s in subs:
707 if mask & s.mask:
708 error(lineno, 'field components overlap')
709 mask |= s.mask
710 f = MultiField(subs, mask)
711 if func:
712 f = FunctionField(func, f)
713
714 if name in fields:
715 error(lineno, 'duplicate field', name)
716 fields[name] = f
717 # end parse_field
718
719
720 def parse_arguments(lineno, name, toks):
721 """Parse one argument set from TOKS at LINENO"""
722 global arguments
723 global re_C_ident
724 global anyextern
725
726 flds = []
727 types = []
728 extern = False
729 for n in toks:
730 if re.fullmatch('!extern', n):
731 extern = True
732 anyextern = True
733 continue
734 if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
735 (n, t) = n.split(':')
736 elif re.fullmatch(re_C_ident, n):
737 t = 'int'
738 else:
739 error(lineno, f'invalid argument set token "{n}"')
740 if n in flds:
741 error(lineno, f'duplicate argument "{n}"')
742 flds.append(n)
743 types.append(t)
744
745 if name in arguments:
746 error(lineno, 'duplicate argument set', name)
747 arguments[name] = Arguments(name, flds, types, extern)
748 # end parse_arguments
749
750
751 def lookup_field(lineno, name):
752 global fields
753 if name in fields:
754 return fields[name]
755 error(lineno, 'undefined field', name)
756
757
758 def add_field(lineno, flds, new_name, f):
759 if new_name in flds:
760 error(lineno, 'duplicate field', new_name)
761 flds[new_name] = f
762 return flds
763
764
765 def add_field_byname(lineno, flds, new_name, old_name):
766 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
767
768
769 def infer_argument_set(flds):
770 global arguments
771 global decode_function
772
773 for arg in arguments.values():
774 if eq_fields_for_args(flds, arg):
775 return arg
776
777 name = decode_function + str(len(arguments))
778 arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
779 arguments[name] = arg
780 return arg
781
782
783 def infer_format(arg, fieldmask, flds, width):
784 global arguments
785 global formats
786 global decode_function
787
788 const_flds = {}
789 var_flds = {}
790 for n, c in flds.items():
791 if c is ConstField:
792 const_flds[n] = c
793 else:
794 var_flds[n] = c
795
796 # Look for an existing format with the same argument set and fields
797 for fmt in formats.values():
798 if arg and fmt.base != arg:
799 continue
800 if fieldmask != fmt.fieldmask:
801 continue
802 if width != fmt.width:
803 continue
804 if not eq_fields_for_fmts(flds, fmt.fields):
805 continue
806 return (fmt, const_flds)
807
808 name = decode_function + '_Fmt_' + str(len(formats))
809 if not arg:
810 arg = infer_argument_set(flds)
811
812 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
813 formats[name] = fmt
814
815 return (fmt, const_flds)
816 # end infer_format
817
818
819 def parse_generic(lineno, parent_pat, name, toks):
820 """Parse one instruction format from TOKS at LINENO"""
821 global fields
822 global arguments
823 global formats
824 global allpatterns
825 global re_arg_ident
826 global re_fld_ident
827 global re_fmt_ident
828 global re_C_ident
829 global insnwidth
830 global insnmask
831 global variablewidth
832
833 is_format = parent_pat is None
834
835 fixedmask = 0
836 fixedbits = 0
837 undefmask = 0
838 width = 0
839 flds = {}
840 arg = None
841 fmt = None
842 for t in toks:
843 # '&Foo' gives a format an explicit argument set.
844 if re.fullmatch(re_arg_ident, t):
845 tt = t[1:]
846 if arg:
847 error(lineno, 'multiple argument sets')
848 if tt in arguments:
849 arg = arguments[tt]
850 else:
851 error(lineno, 'undefined argument set', t)
852 continue
853
854 # '@Foo' gives a pattern an explicit format.
855 if re.fullmatch(re_fmt_ident, t):
856 tt = t[1:]
857 if fmt:
858 error(lineno, 'multiple formats')
859 if tt in formats:
860 fmt = formats[tt]
861 else:
862 error(lineno, 'undefined format', t)
863 continue
864
865 # '%Foo' imports a field.
866 if re.fullmatch(re_fld_ident, t):
867 tt = t[1:]
868 flds = add_field_byname(lineno, flds, tt, tt)
869 continue
870
871 # 'Foo=%Bar' imports a field with a different name.
872 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
873 (fname, iname) = t.split('=%')
874 flds = add_field_byname(lineno, flds, fname, iname)
875 continue
876
877 # 'Foo=number' sets an argument field to a constant value
878 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
879 (fname, value) = t.split('=')
880 value = int(value)
881 flds = add_field(lineno, flds, fname, ConstField(value))
882 continue
883
884 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
885 # required ones, or dont-cares.
886 if re.fullmatch('[01.-]+', t):
887 shift = len(t)
888 fms = t.replace('0', '1')
889 fms = fms.replace('.', '0')
890 fms = fms.replace('-', '0')
891 fbs = t.replace('.', '0')
892 fbs = fbs.replace('-', '0')
893 ubm = t.replace('1', '0')
894 ubm = ubm.replace('.', '0')
895 ubm = ubm.replace('-', '1')
896 fms = int(fms, 2)
897 fbs = int(fbs, 2)
898 ubm = int(ubm, 2)
899 fixedbits = (fixedbits << shift) | fbs
900 fixedmask = (fixedmask << shift) | fms
901 undefmask = (undefmask << shift) | ubm
902 # Otherwise, fieldname:fieldwidth
903 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
904 (fname, flen) = t.split(':')
905 sign = False
906 if flen[0] == 's':
907 sign = True
908 flen = flen[1:]
909 shift = int(flen, 10)
910 if shift + width > insnwidth:
911 error(lineno, f'field {fname} exceeds insnwidth')
912 f = Field(sign, insnwidth - width - shift, shift)
913 flds = add_field(lineno, flds, fname, f)
914 fixedbits <<= shift
915 fixedmask <<= shift
916 undefmask <<= shift
917 else:
918 error(lineno, f'invalid token "{t}"')
919 width += shift
920
921 if variablewidth and width < insnwidth and width % 8 == 0:
922 shift = insnwidth - width
923 fixedbits <<= shift
924 fixedmask <<= shift
925 undefmask <<= shift
926 undefmask |= (1 << shift) - 1
927
928 # We should have filled in all of the bits of the instruction.
929 elif not (is_format and width == 0) and width != insnwidth:
930 error(lineno, f'definition has {width} bits')
931
932 # Do not check for fields overlapping fields; one valid usage
933 # is to be able to duplicate fields via import.
934 fieldmask = 0
935 for f in flds.values():
936 fieldmask |= f.mask
937
938 # Fix up what we've parsed to match either a format or a pattern.
939 if is_format:
940 # Formats cannot reference formats.
941 if fmt:
942 error(lineno, 'format referencing format')
943 # If an argument set is given, then there should be no fields
944 # without a place to store it.
945 if arg:
946 for f in flds.keys():
947 if f not in arg.fields:
948 error(lineno, f'field {f} not in argument set {arg.name}')
949 else:
950 arg = infer_argument_set(flds)
951 if name in formats:
952 error(lineno, 'duplicate format name', name)
953 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
954 undefmask, fieldmask, flds, width)
955 formats[name] = fmt
956 else:
957 # Patterns can reference a format ...
958 if fmt:
959 # ... but not an argument simultaneously
960 if arg:
961 error(lineno, 'pattern specifies both format and argument set')
962 if fixedmask & fmt.fixedmask:
963 error(lineno, 'pattern fixed bits overlap format fixed bits')
964 if width != fmt.width:
965 error(lineno, 'pattern uses format of different width')
966 fieldmask |= fmt.fieldmask
967 fixedbits |= fmt.fixedbits
968 fixedmask |= fmt.fixedmask
969 undefmask |= fmt.undefmask
970 else:
971 (fmt, flds) = infer_format(arg, fieldmask, flds, width)
972 arg = fmt.base
973 for f in flds.keys():
974 if f not in arg.fields:
975 error(lineno, f'field {f} not in argument set {arg.name}')
976 if f in fmt.fields.keys():
977 error(lineno, f'field {f} set by format and pattern')
978 for f in arg.fields:
979 if f not in flds.keys() and f not in fmt.fields.keys():
980 error(lineno, f'field {f} not initialized')
981 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
982 undefmask, fieldmask, flds, width)
983 parent_pat.pats.append(pat)
984 allpatterns.append(pat)
985
986 # Validate the masks that we have assembled.
987 if fieldmask & fixedmask:
988 error(lineno, 'fieldmask overlaps fixedmask ',
989 f'({whex(fieldmask)} & {whex(fixedmask)})')
990 if fieldmask & undefmask:
991 error(lineno, 'fieldmask overlaps undefmask ',
992 f'({whex(fieldmask)} & {whex(undefmask)})')
993 if fixedmask & undefmask:
994 error(lineno, 'fixedmask overlaps undefmask ',
995 f'({whex(fixedmask)} & {whex(undefmask)})')
996 if not is_format:
997 allbits = fieldmask | fixedmask | undefmask
998 if allbits != insnmask:
999 error(lineno, 'bits left unspecified ',
1000 f'({whex(allbits ^ insnmask)})')
1001 # end parse_general
1002
1003
1004 def parse_file(f, parent_pat):
1005 """Parse all of the patterns within a file"""
1006 global re_arg_ident
1007 global re_fld_ident
1008 global re_fmt_ident
1009 global re_pat_ident
1010
1011 # Read all of the lines of the file. Concatenate lines
1012 # ending in backslash; discard empty lines and comments.
1013 toks = []
1014 lineno = 0
1015 nesting = 0
1016 nesting_pats = []
1017
1018 for line in f:
1019 lineno += 1
1020
1021 # Expand and strip spaces, to find indent.
1022 line = line.rstrip()
1023 line = line.expandtabs()
1024 len1 = len(line)
1025 line = line.lstrip()
1026 len2 = len(line)
1027
1028 # Discard comments
1029 end = line.find('#')
1030 if end >= 0:
1031 line = line[:end]
1032
1033 t = line.split()
1034 if len(toks) != 0:
1035 # Next line after continuation
1036 toks.extend(t)
1037 else:
1038 # Allow completely blank lines.
1039 if len1 == 0:
1040 continue
1041 indent = len1 - len2
1042 # Empty line due to comment.
1043 if len(t) == 0:
1044 # Indentation must be correct, even for comment lines.
1045 if indent != nesting:
1046 error(lineno, 'indentation ', indent, ' != ', nesting)
1047 continue
1048 start_lineno = lineno
1049 toks = t
1050
1051 # Continuation?
1052 if toks[-1] == '\\':
1053 toks.pop()
1054 continue
1055
1056 name = toks[0]
1057 del toks[0]
1058
1059 # End nesting?
1060 if name == '}' or name == ']':
1061 if len(toks) != 0:
1062 error(start_lineno, 'extra tokens after close brace')
1063
1064 # Make sure { } and [ ] nest properly.
1065 if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1066 error(lineno, 'mismatched close brace')
1067
1068 try:
1069 parent_pat = nesting_pats.pop()
1070 except:
1071 error(lineno, 'extra close brace')
1072
1073 nesting -= 2
1074 if indent != nesting:
1075 error(lineno, 'indentation ', indent, ' != ', nesting)
1076
1077 toks = []
1078 continue
1079
1080 # Everything else should have current indentation.
1081 if indent != nesting:
1082 error(start_lineno, 'indentation ', indent, ' != ', nesting)
1083
1084 # Start nesting?
1085 if name == '{' or name == '[':
1086 if len(toks) != 0:
1087 error(start_lineno, 'extra tokens after open brace')
1088
1089 if name == '{':
1090 nested_pat = IncMultiPattern(start_lineno)
1091 else:
1092 nested_pat = ExcMultiPattern(start_lineno)
1093 parent_pat.pats.append(nested_pat)
1094 nesting_pats.append(parent_pat)
1095 parent_pat = nested_pat
1096
1097 nesting += 2
1098 toks = []
1099 continue
1100
1101 # Determine the type of object needing to be parsed.
1102 if re.fullmatch(re_fld_ident, name):
1103 parse_field(start_lineno, name[1:], toks)
1104 elif re.fullmatch(re_arg_ident, name):
1105 parse_arguments(start_lineno, name[1:], toks)
1106 elif re.fullmatch(re_fmt_ident, name):
1107 parse_generic(start_lineno, None, name[1:], toks)
1108 elif re.fullmatch(re_pat_ident, name):
1109 parse_generic(start_lineno, parent_pat, name, toks)
1110 else:
1111 error(lineno, f'invalid token "{name}"')
1112 toks = []
1113
1114 if nesting != 0:
1115 error(lineno, 'missing close brace')
1116 # end parse_file
1117
1118
1119 class SizeTree:
1120 """Class representing a node in a size decode tree"""
1121
1122 def __init__(self, m, w):
1123 self.mask = m
1124 self.subs = []
1125 self.base = None
1126 self.width = w
1127
1128 def str1(self, i):
1129 ind = str_indent(i)
1130 r = ind + whex(self.mask) + ' [\n'
1131 for (b, s) in self.subs:
1132 r += ind + f' {whex(b)}:\n'
1133 r += s.str1(i + 4) + '\n'
1134 r += ind + ']'
1135 return r
1136
1137 def __str__(self):
1138 return self.str1(0)
1139
1140 def output_code(self, i, extracted, outerbits, outermask):
1141 ind = str_indent(i)
1142
1143 # If we need to load more bytes to test, do so now.
1144 if extracted < self.width:
1145 output(ind, f'insn = {decode_function}_load_bytes',
1146 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1147 extracted = self.width
1148
1149 # Attempt to aid the compiler in producing compact switch statements.
1150 # If the bits in the mask are contiguous, extract them.
1151 sh = is_contiguous(self.mask)
1152 if sh > 0:
1153 # Propagate SH down into the local functions.
1154 def str_switch(b, sh=sh):
1155 return f'(insn >> {sh}) & {b >> sh:#x}'
1156
1157 def str_case(b, sh=sh):
1158 return hex(b >> sh)
1159 else:
1160 def str_switch(b):
1161 return f'insn & {whexC(b)}'
1162
1163 def str_case(b):
1164 return whexC(b)
1165
1166 output(ind, 'switch (', str_switch(self.mask), ') {\n')
1167 for b, s in sorted(self.subs):
1168 innermask = outermask | self.mask
1169 innerbits = outerbits | b
1170 output(ind, 'case ', str_case(b), ':\n')
1171 output(ind, ' /* ',
1172 str_match_bits(innerbits, innermask), ' */\n')
1173 s.output_code(i + 4, extracted, innerbits, innermask)
1174 output(ind, '}\n')
1175 output(ind, 'return insn;\n')
1176 # end SizeTree
1177
1178 class SizeLeaf:
1179 """Class representing a leaf node in a size decode tree"""
1180
1181 def __init__(self, m, w):
1182 self.mask = m
1183 self.width = w
1184
1185 def str1(self, i):
1186 return str_indent(i) + whex(self.mask)
1187
1188 def __str__(self):
1189 return self.str1(0)
1190
1191 def output_code(self, i, extracted, outerbits, outermask):
1192 global decode_function
1193 ind = str_indent(i)
1194
1195 # If we need to load more bytes, do so now.
1196 if extracted < self.width:
1197 output(ind, f'insn = {decode_function}_load_bytes',
1198 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
1199 extracted = self.width
1200 output(ind, 'return insn;\n')
1201 # end SizeLeaf
1202
1203
1204 def build_size_tree(pats, width, outerbits, outermask):
1205 global insnwidth
1206
1207 # Collect the mask of bits that are fixed in this width
1208 innermask = 0xff << (insnwidth - width)
1209 innermask &= ~outermask
1210 minwidth = None
1211 onewidth = True
1212 for i in pats:
1213 innermask &= i.fixedmask
1214 if minwidth is None:
1215 minwidth = i.width
1216 elif minwidth != i.width:
1217 onewidth = False;
1218 if minwidth < i.width:
1219 minwidth = i.width
1220
1221 if onewidth:
1222 return SizeLeaf(innermask, minwidth)
1223
1224 if innermask == 0:
1225 if width < minwidth:
1226 return build_size_tree(pats, width + 8, outerbits, outermask)
1227
1228 pnames = []
1229 for p in pats:
1230 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1231 error_with_file(pats[0].file, pats[0].lineno,
1232 f'overlapping patterns size {width}:', pnames)
1233
1234 bins = {}
1235 for i in pats:
1236 fb = i.fixedbits & innermask
1237 if fb in bins:
1238 bins[fb].append(i)
1239 else:
1240 bins[fb] = [i]
1241
1242 fullmask = outermask | innermask
1243 lens = sorted(bins.keys())
1244 if len(lens) == 1:
1245 b = lens[0]
1246 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1247
1248 r = SizeTree(innermask, width)
1249 for b, l in bins.items():
1250 s = build_size_tree(l, width, b | outerbits, fullmask)
1251 r.subs.append((b, s))
1252 return r
1253 # end build_size_tree
1254
1255
1256 def prop_size(tree):
1257 """Propagate minimum widths up the decode size tree"""
1258
1259 if isinstance(tree, SizeTree):
1260 min = None
1261 for (b, s) in tree.subs:
1262 width = prop_size(s)
1263 if min is None or min > width:
1264 min = width
1265 assert min >= tree.width
1266 tree.width = min
1267 else:
1268 min = tree.width
1269 return min
1270 # end prop_size
1271
1272
1273 def main():
1274 global arguments
1275 global formats
1276 global allpatterns
1277 global translate_scope
1278 global translate_prefix
1279 global output_fd
1280 global output_file
1281 global input_file
1282 global insnwidth
1283 global insntype
1284 global insnmask
1285 global decode_function
1286 global bitop_width
1287 global variablewidth
1288 global anyextern
1289
1290 decode_scope = 'static '
1291
1292 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1293 'static-decode=', 'varinsnwidth=']
1294 try:
1295 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
1296 except getopt.GetoptError as err:
1297 error(0, err)
1298 for o, a in opts:
1299 if o in ('-o', '--output'):
1300 output_file = a
1301 elif o == '--decode':
1302 decode_function = a
1303 decode_scope = ''
1304 elif o == '--static-decode':
1305 decode_function = a
1306 elif o == '--translate':
1307 translate_prefix = a
1308 translate_scope = ''
1309 elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1310 if o == '--varinsnwidth':
1311 variablewidth = True
1312 insnwidth = int(a)
1313 if insnwidth == 16:
1314 insntype = 'uint16_t'
1315 insnmask = 0xffff
1316 elif insnwidth == 64:
1317 insntype = 'uint64_t'
1318 insnmask = 0xffffffffffffffff
1319 bitop_width = 64
1320 elif insnwidth != 32:
1321 error(0, 'cannot handle insns of width', insnwidth)
1322 else:
1323 assert False, 'unhandled option'
1324
1325 if len(args) < 1:
1326 error(0, 'missing input file')
1327
1328 toppat = ExcMultiPattern(0)
1329
1330 for filename in args:
1331 input_file = filename
1332 f = open(filename, 'rt', encoding='utf-8')
1333 parse_file(f, toppat)
1334 f.close()
1335
1336 # We do not want to compute masks for toppat, because those masks
1337 # are used as a starting point for build_tree. For toppat, we must
1338 # insist that decode begins from naught.
1339 for i in toppat.pats:
1340 i.prop_masks()
1341
1342 toppat.build_tree()
1343 toppat.prop_format()
1344
1345 if variablewidth:
1346 for i in toppat.pats:
1347 i.prop_width()
1348 stree = build_size_tree(toppat.pats, 8, 0, 0)
1349 prop_size(stree)
1350
1351 if output_file:
1352 output_fd = open(output_file, 'wt', encoding='utf-8')
1353 else:
1354 output_fd = io.TextIOWrapper(sys.stdout.buffer,
1355 encoding=sys.stdout.encoding,
1356 errors="ignore")
1357
1358 output_autogen()
1359 for n in sorted(arguments.keys()):
1360 f = arguments[n]
1361 f.output_def()
1362
1363 # A single translate function can be invoked for different patterns.
1364 # Make sure that the argument sets are the same, and declare the
1365 # function only once.
1366 #
1367 # If we're sharing formats, we're likely also sharing trans_* functions,
1368 # but we can't tell which ones. Prevent issues from the compiler by
1369 # suppressing redundant declaration warnings.
1370 if anyextern:
1371 output("#pragma GCC diagnostic push\n",
1372 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1373 "#ifdef __clang__\n"
1374 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1375 "#endif\n\n")
1376
1377 out_pats = {}
1378 for i in allpatterns:
1379 if i.name in out_pats:
1380 p = out_pats[i.name]
1381 if i.base.base != p.base.base:
1382 error(0, i.name, ' has conflicting argument sets')
1383 else:
1384 i.output_decl()
1385 out_pats[i.name] = i
1386 output('\n')
1387
1388 if anyextern:
1389 output("#pragma GCC diagnostic pop\n\n")
1390
1391 for n in sorted(formats.keys()):
1392 f = formats[n]
1393 f.output_extract()
1394
1395 output(decode_scope, 'bool ', decode_function,
1396 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1397
1398 i4 = str_indent(4)
1399
1400 if len(allpatterns) != 0:
1401 output(i4, 'union {\n')
1402 for n in sorted(arguments.keys()):
1403 f = arguments[n]
1404 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1405 output(i4, '} u;\n\n')
1406 toppat.output_code(4, False, 0, 0)
1407
1408 output(i4, 'return false;\n')
1409 output('}\n')
1410
1411 if variablewidth:
1412 output('\n', decode_scope, insntype, ' ', decode_function,
1413 '_load(DisasContext *ctx)\n{\n',
1414 ' ', insntype, ' insn = 0;\n\n')
1415 stree.output_code(4, 0, 0, 0)
1416 output('}\n')
1417
1418 if output_file:
1419 output_fd.close()
1420 # end main
1421
1422
1423 if __name__ == '__main__':
1424 main()