fuzz: use qemu_get_exec_dir
[qemu.git] / scripts / decodetree.py
1 #!/usr/bin/env python3
2 # Copyright (c) 2018 Linaro Limited
3 #
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2 of the License, or (at your option) any later version.
8 #
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
13 #
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 #
17
18 #
19 # Generate a decoding tree from a specification file.
20 # See the syntax and semantics in docs/devel/decodetree.rst.
21 #
22
23 import os
24 import re
25 import sys
26 import getopt
27
28 insnwidth = 32
29 insnmask = 0xffffffff
30 variablewidth = False
31 fields = {}
32 arguments = {}
33 formats = {}
34 allpatterns = []
35 anyextern = False
36
37 translate_prefix = 'trans'
38 translate_scope = 'static '
39 input_file = ''
40 output_file = None
41 output_fd = None
42 insntype = 'uint32_t'
43 decode_function = 'decode'
44
45 # An identifier for C.
46 re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
47
48 # Identifiers for Arguments, Fields, Formats and Patterns.
49 re_arg_ident = '&[a-zA-Z0-9_]*'
50 re_fld_ident = '%[a-zA-Z0-9_]*'
51 re_fmt_ident = '@[a-zA-Z0-9_]*'
52 re_pat_ident = '[a-zA-Z0-9_]*'
53
54 def error_with_file(file, lineno, *args):
55 """Print an error message from file:line and args and exit."""
56 global output_file
57 global output_fd
58
59 prefix = ''
60 if file:
61 prefix += '{0}:'.format(file)
62 if lineno:
63 prefix += '{0}:'.format(lineno)
64 if prefix:
65 prefix += ' '
66 print(prefix, end='error: ', file=sys.stderr)
67 print(*args, file=sys.stderr)
68
69 if output_file and output_fd:
70 output_fd.close()
71 os.remove(output_file)
72 exit(1)
73 # end error_with_file
74
75
76 def error(lineno, *args):
77 error_with_file(input_file, lineno, *args)
78 # end error
79
80
81 def output(*args):
82 global output_fd
83 for a in args:
84 output_fd.write(a)
85
86
87 def output_autogen():
88 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
89
90
91 def str_indent(c):
92 """Return a string with C spaces"""
93 return ' ' * c
94
95
96 def str_fields(fields):
97 """Return a string uniquely identifying FIELDS"""
98 r = ''
99 for n in sorted(fields.keys()):
100 r += '_' + n
101 return r[1:]
102
103
104 def str_match_bits(bits, mask):
105 """Return a string pretty-printing BITS/MASK"""
106 global insnwidth
107
108 i = 1 << (insnwidth - 1)
109 space = 0x01010100
110 r = ''
111 while i != 0:
112 if i & mask:
113 if i & bits:
114 r += '1'
115 else:
116 r += '0'
117 else:
118 r += '.'
119 if i & space:
120 r += ' '
121 i >>= 1
122 return r
123
124
125 def is_pow2(x):
126 """Return true iff X is equal to a power of 2."""
127 return (x & (x - 1)) == 0
128
129
130 def ctz(x):
131 """Return the number of times 2 factors into X."""
132 assert x != 0
133 r = 0
134 while ((x >> r) & 1) == 0:
135 r += 1
136 return r
137
138
139 def is_contiguous(bits):
140 if bits == 0:
141 return -1
142 shift = ctz(bits)
143 if is_pow2((bits >> shift) + 1):
144 return shift
145 else:
146 return -1
147
148
149 def eq_fields_for_args(flds_a, flds_b):
150 if len(flds_a) != len(flds_b):
151 return False
152 for k, a in flds_a.items():
153 if k not in flds_b:
154 return False
155 return True
156
157
158 def eq_fields_for_fmts(flds_a, flds_b):
159 if len(flds_a) != len(flds_b):
160 return False
161 for k, a in flds_a.items():
162 if k not in flds_b:
163 return False
164 b = flds_b[k]
165 if a.__class__ != b.__class__ or a != b:
166 return False
167 return True
168
169
170 class Field:
171 """Class representing a simple instruction field"""
172 def __init__(self, sign, pos, len):
173 self.sign = sign
174 self.pos = pos
175 self.len = len
176 self.mask = ((1 << len) - 1) << pos
177
178 def __str__(self):
179 if self.sign:
180 s = 's'
181 else:
182 s = ''
183 return str(self.pos) + ':' + s + str(self.len)
184
185 def str_extract(self):
186 if self.sign:
187 extr = 'sextract32'
188 else:
189 extr = 'extract32'
190 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
191
192 def __eq__(self, other):
193 return self.sign == other.sign and self.mask == other.mask
194
195 def __ne__(self, other):
196 return not self.__eq__(other)
197 # end Field
198
199
200 class MultiField:
201 """Class representing a compound instruction field"""
202 def __init__(self, subs, mask):
203 self.subs = subs
204 self.sign = subs[0].sign
205 self.mask = mask
206
207 def __str__(self):
208 return str(self.subs)
209
210 def str_extract(self):
211 ret = '0'
212 pos = 0
213 for f in reversed(self.subs):
214 if pos == 0:
215 ret = f.str_extract()
216 else:
217 ret = 'deposit32({0}, {1}, {2}, {3})' \
218 .format(ret, pos, 32 - pos, f.str_extract())
219 pos += f.len
220 return ret
221
222 def __ne__(self, other):
223 if len(self.subs) != len(other.subs):
224 return True
225 for a, b in zip(self.subs, other.subs):
226 if a.__class__ != b.__class__ or a != b:
227 return True
228 return False
229
230 def __eq__(self, other):
231 return not self.__ne__(other)
232 # end MultiField
233
234
235 class ConstField:
236 """Class representing an argument field with constant value"""
237 def __init__(self, value):
238 self.value = value
239 self.mask = 0
240 self.sign = value < 0
241
242 def __str__(self):
243 return str(self.value)
244
245 def str_extract(self):
246 return str(self.value)
247
248 def __cmp__(self, other):
249 return self.value - other.value
250 # end ConstField
251
252
253 class FunctionField:
254 """Class representing a field passed through a function"""
255 def __init__(self, func, base):
256 self.mask = base.mask
257 self.sign = base.sign
258 self.base = base
259 self.func = func
260
261 def __str__(self):
262 return self.func + '(' + str(self.base) + ')'
263
264 def str_extract(self):
265 return self.func + '(ctx, ' + self.base.str_extract() + ')'
266
267 def __eq__(self, other):
268 return self.func == other.func and self.base == other.base
269
270 def __ne__(self, other):
271 return not self.__eq__(other)
272 # end FunctionField
273
274
275 class ParameterField:
276 """Class representing a pseudo-field read from a function"""
277 def __init__(self, func):
278 self.mask = 0
279 self.sign = 0
280 self.func = func
281
282 def __str__(self):
283 return self.func
284
285 def str_extract(self):
286 return self.func + '(ctx)'
287
288 def __eq__(self, other):
289 return self.func == other.func
290
291 def __ne__(self, other):
292 return not self.__eq__(other)
293 # end ParameterField
294
295
296 class Arguments:
297 """Class representing the extracted fields of a format"""
298 def __init__(self, nm, flds, extern):
299 self.name = nm
300 self.extern = extern
301 self.fields = sorted(flds)
302
303 def __str__(self):
304 return self.name + ' ' + str(self.fields)
305
306 def struct_name(self):
307 return 'arg_' + self.name
308
309 def output_def(self):
310 if not self.extern:
311 output('typedef struct {\n')
312 for n in self.fields:
313 output(' int ', n, ';\n')
314 output('} ', self.struct_name(), ';\n\n')
315 # end Arguments
316
317
318 class General:
319 """Common code between instruction formats and instruction patterns"""
320 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
321 self.name = name
322 self.file = input_file
323 self.lineno = lineno
324 self.base = base
325 self.fixedbits = fixb
326 self.fixedmask = fixm
327 self.undefmask = udfm
328 self.fieldmask = fldm
329 self.fields = flds
330 self.width = w
331
332 def __str__(self):
333 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
334
335 def str1(self, i):
336 return str_indent(i) + self.__str__()
337 # end General
338
339
340 class Format(General):
341 """Class representing an instruction format"""
342
343 def extract_name(self):
344 global decode_function
345 return decode_function + '_extract_' + self.name
346
347 def output_extract(self):
348 output('static void ', self.extract_name(), '(DisasContext *ctx, ',
349 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
350 for n, f in self.fields.items():
351 output(' a->', n, ' = ', f.str_extract(), ';\n')
352 output('}\n\n')
353 # end Format
354
355
356 class Pattern(General):
357 """Class representing an instruction pattern"""
358
359 def output_decl(self):
360 global translate_scope
361 global translate_prefix
362 output('typedef ', self.base.base.struct_name(),
363 ' arg_', self.name, ';\n')
364 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
365 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
366
367 def output_code(self, i, extracted, outerbits, outermask):
368 global translate_prefix
369 ind = str_indent(i)
370 arg = self.base.base.name
371 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
372 if not extracted:
373 output(ind, self.base.extract_name(),
374 '(ctx, &u.f_', arg, ', insn);\n')
375 for n, f in self.fields.items():
376 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
377 output(ind, 'if (', translate_prefix, '_', self.name,
378 '(ctx, &u.f_', arg, ')) return true;\n')
379
380 # Normal patterns do not have children.
381 def build_tree(self):
382 return
383 def prop_masks(self):
384 return
385 def prop_format(self):
386 return
387 def prop_width(self):
388 return
389
390 # end Pattern
391
392
393 class MultiPattern(General):
394 """Class representing a set of instruction patterns"""
395
396 def __init__(self, lineno):
397 self.file = input_file
398 self.lineno = lineno
399 self.pats = []
400 self.base = None
401 self.fixedbits = 0
402 self.fixedmask = 0
403 self.undefmask = 0
404 self.width = None
405
406 def __str__(self):
407 r = 'group'
408 if self.fixedbits is not None:
409 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
410 return r
411
412 def output_decl(self):
413 for p in self.pats:
414 p.output_decl()
415
416 def prop_masks(self):
417 global insnmask
418
419 fixedmask = insnmask
420 undefmask = insnmask
421
422 # Collect fixedmask/undefmask for all of the children.
423 for p in self.pats:
424 p.prop_masks()
425 fixedmask &= p.fixedmask
426 undefmask &= p.undefmask
427
428 # Widen fixedmask until all fixedbits match
429 repeat = True
430 fixedbits = 0
431 while repeat and fixedmask != 0:
432 fixedbits = None
433 for p in self.pats:
434 thisbits = p.fixedbits & fixedmask
435 if fixedbits is None:
436 fixedbits = thisbits
437 elif fixedbits != thisbits:
438 fixedmask &= ~(fixedbits ^ thisbits)
439 break
440 else:
441 repeat = False
442
443 self.fixedbits = fixedbits
444 self.fixedmask = fixedmask
445 self.undefmask = undefmask
446
447 def build_tree(self):
448 for p in self.pats:
449 p.build_tree()
450
451 def prop_format(self):
452 for p in self.pats:
453 p.build_tree()
454
455 def prop_width(self):
456 width = None
457 for p in self.pats:
458 p.prop_width()
459 if width is None:
460 width = p.width
461 elif width != p.width:
462 error_with_file(self.file, self.lineno,
463 'width mismatch in patterns within braces')
464 self.width = width
465
466 # end MultiPattern
467
468
469 class IncMultiPattern(MultiPattern):
470 """Class representing an overlapping set of instruction patterns"""
471
472 def output_code(self, i, extracted, outerbits, outermask):
473 global translate_prefix
474 ind = str_indent(i)
475 for p in self.pats:
476 if outermask != p.fixedmask:
477 innermask = p.fixedmask & ~outermask
478 innerbits = p.fixedbits & ~outermask
479 output(ind, 'if ((insn & ',
480 '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
481 ') {\n')
482 output(ind, ' /* ',
483 str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
484 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
485 output(ind, '}\n')
486 else:
487 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
488 #end IncMultiPattern
489
490
491 class Tree:
492 """Class representing a node in a decode tree"""
493
494 def __init__(self, fm, tm):
495 self.fixedmask = fm
496 self.thismask = tm
497 self.subs = []
498 self.base = None
499
500 def str1(self, i):
501 ind = str_indent(i)
502 r = '{0}{1:08x}'.format(ind, self.fixedmask)
503 if self.format:
504 r += ' ' + self.format.name
505 r += ' [\n'
506 for (b, s) in self.subs:
507 r += '{0} {1:08x}:\n'.format(ind, b)
508 r += s.str1(i + 4) + '\n'
509 r += ind + ']'
510 return r
511
512 def __str__(self):
513 return self.str1(0)
514
515 def output_code(self, i, extracted, outerbits, outermask):
516 ind = str_indent(i)
517
518 # If we identified all nodes below have the same format,
519 # extract the fields now.
520 if not extracted and self.base:
521 output(ind, self.base.extract_name(),
522 '(ctx, &u.f_', self.base.base.name, ', insn);\n')
523 extracted = True
524
525 # Attempt to aid the compiler in producing compact switch statements.
526 # If the bits in the mask are contiguous, extract them.
527 sh = is_contiguous(self.thismask)
528 if sh > 0:
529 # Propagate SH down into the local functions.
530 def str_switch(b, sh=sh):
531 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
532
533 def str_case(b, sh=sh):
534 return '0x{0:x}'.format(b >> sh)
535 else:
536 def str_switch(b):
537 return 'insn & 0x{0:08x}'.format(b)
538
539 def str_case(b):
540 return '0x{0:08x}'.format(b)
541
542 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
543 for b, s in sorted(self.subs):
544 assert (self.thismask & ~s.fixedmask) == 0
545 innermask = outermask | self.thismask
546 innerbits = outerbits | b
547 output(ind, 'case ', str_case(b), ':\n')
548 output(ind, ' /* ',
549 str_match_bits(innerbits, innermask), ' */\n')
550 s.output_code(i + 4, extracted, innerbits, innermask)
551 output(ind, ' return false;\n')
552 output(ind, '}\n')
553 # end Tree
554
555
556 class ExcMultiPattern(MultiPattern):
557 """Class representing a non-overlapping set of instruction patterns"""
558
559 def output_code(self, i, extracted, outerbits, outermask):
560 # Defer everything to our decomposed Tree node
561 self.tree.output_code(i, extracted, outerbits, outermask)
562
563 @staticmethod
564 def __build_tree(pats, outerbits, outermask):
565 # Find the intersection of all remaining fixedmask.
566 innermask = ~outermask & insnmask
567 for i in pats:
568 innermask &= i.fixedmask
569
570 if innermask == 0:
571 # Edge condition: One pattern covers the entire insnmask
572 if len(pats) == 1:
573 t = Tree(outermask, innermask)
574 t.subs.append((0, pats[0]))
575 return t
576
577 text = 'overlapping patterns:'
578 for p in pats:
579 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
580 error_with_file(pats[0].file, pats[0].lineno, text)
581
582 fullmask = outermask | innermask
583
584 # Sort each element of pats into the bin selected by the mask.
585 bins = {}
586 for i in pats:
587 fb = i.fixedbits & innermask
588 if fb in bins:
589 bins[fb].append(i)
590 else:
591 bins[fb] = [i]
592
593 # We must recurse if any bin has more than one element or if
594 # the single element in the bin has not been fully matched.
595 t = Tree(fullmask, innermask)
596
597 for b, l in bins.items():
598 s = l[0]
599 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
600 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
601 t.subs.append((b, s))
602
603 return t
604
605 def build_tree(self):
606 super().prop_format()
607 self.tree = self.__build_tree(self.pats, self.fixedbits,
608 self.fixedmask)
609
610 @staticmethod
611 def __prop_format(tree):
612 """Propagate Format objects into the decode tree"""
613
614 # Depth first search.
615 for (b, s) in tree.subs:
616 if isinstance(s, Tree):
617 ExcMultiPattern.__prop_format(s)
618
619 # If all entries in SUBS have the same format, then
620 # propagate that into the tree.
621 f = None
622 for (b, s) in tree.subs:
623 if f is None:
624 f = s.base
625 if f is None:
626 return
627 if f is not s.base:
628 return
629 tree.base = f
630
631 def prop_format(self):
632 super().prop_format()
633 self.__prop_format(self.tree)
634
635 # end ExcMultiPattern
636
637
638 def parse_field(lineno, name, toks):
639 """Parse one instruction field from TOKS at LINENO"""
640 global fields
641 global insnwidth
642
643 # A "simple" field will have only one entry;
644 # a "multifield" will have several.
645 subs = []
646 width = 0
647 func = None
648 for t in toks:
649 if re.match('^!function=', t):
650 if func:
651 error(lineno, 'duplicate function')
652 func = t.split('=')
653 func = func[1]
654 continue
655
656 if re.fullmatch('[0-9]+:s[0-9]+', t):
657 # Signed field extract
658 subtoks = t.split(':s')
659 sign = True
660 elif re.fullmatch('[0-9]+:[0-9]+', t):
661 # Unsigned field extract
662 subtoks = t.split(':')
663 sign = False
664 else:
665 error(lineno, 'invalid field token "{0}"'.format(t))
666 po = int(subtoks[0])
667 le = int(subtoks[1])
668 if po + le > insnwidth:
669 error(lineno, 'field {0} too large'.format(t))
670 f = Field(sign, po, le)
671 subs.append(f)
672 width += le
673
674 if width > insnwidth:
675 error(lineno, 'field too large')
676 if len(subs) == 0:
677 if func:
678 f = ParameterField(func)
679 else:
680 error(lineno, 'field with no value')
681 else:
682 if len(subs) == 1:
683 f = subs[0]
684 else:
685 mask = 0
686 for s in subs:
687 if mask & s.mask:
688 error(lineno, 'field components overlap')
689 mask |= s.mask
690 f = MultiField(subs, mask)
691 if func:
692 f = FunctionField(func, f)
693
694 if name in fields:
695 error(lineno, 'duplicate field', name)
696 fields[name] = f
697 # end parse_field
698
699
700 def parse_arguments(lineno, name, toks):
701 """Parse one argument set from TOKS at LINENO"""
702 global arguments
703 global re_C_ident
704 global anyextern
705
706 flds = []
707 extern = False
708 for t in toks:
709 if re.fullmatch('!extern', t):
710 extern = True
711 anyextern = True
712 continue
713 if not re.fullmatch(re_C_ident, t):
714 error(lineno, 'invalid argument set token "{0}"'.format(t))
715 if t in flds:
716 error(lineno, 'duplicate argument "{0}"'.format(t))
717 flds.append(t)
718
719 if name in arguments:
720 error(lineno, 'duplicate argument set', name)
721 arguments[name] = Arguments(name, flds, extern)
722 # end parse_arguments
723
724
725 def lookup_field(lineno, name):
726 global fields
727 if name in fields:
728 return fields[name]
729 error(lineno, 'undefined field', name)
730
731
732 def add_field(lineno, flds, new_name, f):
733 if new_name in flds:
734 error(lineno, 'duplicate field', new_name)
735 flds[new_name] = f
736 return flds
737
738
739 def add_field_byname(lineno, flds, new_name, old_name):
740 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
741
742
743 def infer_argument_set(flds):
744 global arguments
745 global decode_function
746
747 for arg in arguments.values():
748 if eq_fields_for_args(flds, arg.fields):
749 return arg
750
751 name = decode_function + str(len(arguments))
752 arg = Arguments(name, flds.keys(), False)
753 arguments[name] = arg
754 return arg
755
756
757 def infer_format(arg, fieldmask, flds, width):
758 global arguments
759 global formats
760 global decode_function
761
762 const_flds = {}
763 var_flds = {}
764 for n, c in flds.items():
765 if c is ConstField:
766 const_flds[n] = c
767 else:
768 var_flds[n] = c
769
770 # Look for an existing format with the same argument set and fields
771 for fmt in formats.values():
772 if arg and fmt.base != arg:
773 continue
774 if fieldmask != fmt.fieldmask:
775 continue
776 if width != fmt.width:
777 continue
778 if not eq_fields_for_fmts(flds, fmt.fields):
779 continue
780 return (fmt, const_flds)
781
782 name = decode_function + '_Fmt_' + str(len(formats))
783 if not arg:
784 arg = infer_argument_set(flds)
785
786 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
787 formats[name] = fmt
788
789 return (fmt, const_flds)
790 # end infer_format
791
792
793 def parse_generic(lineno, parent_pat, name, toks):
794 """Parse one instruction format from TOKS at LINENO"""
795 global fields
796 global arguments
797 global formats
798 global allpatterns
799 global re_arg_ident
800 global re_fld_ident
801 global re_fmt_ident
802 global re_C_ident
803 global insnwidth
804 global insnmask
805 global variablewidth
806
807 is_format = parent_pat is None
808
809 fixedmask = 0
810 fixedbits = 0
811 undefmask = 0
812 width = 0
813 flds = {}
814 arg = None
815 fmt = None
816 for t in toks:
817 # '&Foo' gives a format an explicit argument set.
818 if re.fullmatch(re_arg_ident, t):
819 tt = t[1:]
820 if arg:
821 error(lineno, 'multiple argument sets')
822 if tt in arguments:
823 arg = arguments[tt]
824 else:
825 error(lineno, 'undefined argument set', t)
826 continue
827
828 # '@Foo' gives a pattern an explicit format.
829 if re.fullmatch(re_fmt_ident, t):
830 tt = t[1:]
831 if fmt:
832 error(lineno, 'multiple formats')
833 if tt in formats:
834 fmt = formats[tt]
835 else:
836 error(lineno, 'undefined format', t)
837 continue
838
839 # '%Foo' imports a field.
840 if re.fullmatch(re_fld_ident, t):
841 tt = t[1:]
842 flds = add_field_byname(lineno, flds, tt, tt)
843 continue
844
845 # 'Foo=%Bar' imports a field with a different name.
846 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
847 (fname, iname) = t.split('=%')
848 flds = add_field_byname(lineno, flds, fname, iname)
849 continue
850
851 # 'Foo=number' sets an argument field to a constant value
852 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
853 (fname, value) = t.split('=')
854 value = int(value)
855 flds = add_field(lineno, flds, fname, ConstField(value))
856 continue
857
858 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
859 # required ones, or dont-cares.
860 if re.fullmatch('[01.-]+', t):
861 shift = len(t)
862 fms = t.replace('0', '1')
863 fms = fms.replace('.', '0')
864 fms = fms.replace('-', '0')
865 fbs = t.replace('.', '0')
866 fbs = fbs.replace('-', '0')
867 ubm = t.replace('1', '0')
868 ubm = ubm.replace('.', '0')
869 ubm = ubm.replace('-', '1')
870 fms = int(fms, 2)
871 fbs = int(fbs, 2)
872 ubm = int(ubm, 2)
873 fixedbits = (fixedbits << shift) | fbs
874 fixedmask = (fixedmask << shift) | fms
875 undefmask = (undefmask << shift) | ubm
876 # Otherwise, fieldname:fieldwidth
877 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
878 (fname, flen) = t.split(':')
879 sign = False
880 if flen[0] == 's':
881 sign = True
882 flen = flen[1:]
883 shift = int(flen, 10)
884 if shift + width > insnwidth:
885 error(lineno, 'field {0} exceeds insnwidth'.format(fname))
886 f = Field(sign, insnwidth - width - shift, shift)
887 flds = add_field(lineno, flds, fname, f)
888 fixedbits <<= shift
889 fixedmask <<= shift
890 undefmask <<= shift
891 else:
892 error(lineno, 'invalid token "{0}"'.format(t))
893 width += shift
894
895 if variablewidth and width < insnwidth and width % 8 == 0:
896 shift = insnwidth - width
897 fixedbits <<= shift
898 fixedmask <<= shift
899 undefmask <<= shift
900 undefmask |= (1 << shift) - 1
901
902 # We should have filled in all of the bits of the instruction.
903 elif not (is_format and width == 0) and width != insnwidth:
904 error(lineno, 'definition has {0} bits'.format(width))
905
906 # Do not check for fields overlapping fields; one valid usage
907 # is to be able to duplicate fields via import.
908 fieldmask = 0
909 for f in flds.values():
910 fieldmask |= f.mask
911
912 # Fix up what we've parsed to match either a format or a pattern.
913 if is_format:
914 # Formats cannot reference formats.
915 if fmt:
916 error(lineno, 'format referencing format')
917 # If an argument set is given, then there should be no fields
918 # without a place to store it.
919 if arg:
920 for f in flds.keys():
921 if f not in arg.fields:
922 error(lineno, 'field {0} not in argument set {1}'
923 .format(f, arg.name))
924 else:
925 arg = infer_argument_set(flds)
926 if name in formats:
927 error(lineno, 'duplicate format name', name)
928 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
929 undefmask, fieldmask, flds, width)
930 formats[name] = fmt
931 else:
932 # Patterns can reference a format ...
933 if fmt:
934 # ... but not an argument simultaneously
935 if arg:
936 error(lineno, 'pattern specifies both format and argument set')
937 if fixedmask & fmt.fixedmask:
938 error(lineno, 'pattern fixed bits overlap format fixed bits')
939 if width != fmt.width:
940 error(lineno, 'pattern uses format of different width')
941 fieldmask |= fmt.fieldmask
942 fixedbits |= fmt.fixedbits
943 fixedmask |= fmt.fixedmask
944 undefmask |= fmt.undefmask
945 else:
946 (fmt, flds) = infer_format(arg, fieldmask, flds, width)
947 arg = fmt.base
948 for f in flds.keys():
949 if f not in arg.fields:
950 error(lineno, 'field {0} not in argument set {1}'
951 .format(f, arg.name))
952 if f in fmt.fields.keys():
953 error(lineno, 'field {0} set by format and pattern'.format(f))
954 for f in arg.fields:
955 if f not in flds.keys() and f not in fmt.fields.keys():
956 error(lineno, 'field {0} not initialized'.format(f))
957 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
958 undefmask, fieldmask, flds, width)
959 parent_pat.pats.append(pat)
960 allpatterns.append(pat)
961
962 # Validate the masks that we have assembled.
963 if fieldmask & fixedmask:
964 error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
965 .format(fieldmask, fixedmask))
966 if fieldmask & undefmask:
967 error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
968 .format(fieldmask, undefmask))
969 if fixedmask & undefmask:
970 error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
971 .format(fixedmask, undefmask))
972 if not is_format:
973 allbits = fieldmask | fixedmask | undefmask
974 if allbits != insnmask:
975 error(lineno, 'bits left unspecified (0x{0:08x})'
976 .format(allbits ^ insnmask))
977 # end parse_general
978
979
980 def parse_file(f, parent_pat):
981 """Parse all of the patterns within a file"""
982 global re_arg_ident
983 global re_fld_ident
984 global re_fmt_ident
985 global re_pat_ident
986
987 # Read all of the lines of the file. Concatenate lines
988 # ending in backslash; discard empty lines and comments.
989 toks = []
990 lineno = 0
991 nesting = 0
992 nesting_pats = []
993
994 for line in f:
995 lineno += 1
996
997 # Expand and strip spaces, to find indent.
998 line = line.rstrip()
999 line = line.expandtabs()
1000 len1 = len(line)
1001 line = line.lstrip()
1002 len2 = len(line)
1003
1004 # Discard comments
1005 end = line.find('#')
1006 if end >= 0:
1007 line = line[:end]
1008
1009 t = line.split()
1010 if len(toks) != 0:
1011 # Next line after continuation
1012 toks.extend(t)
1013 else:
1014 # Allow completely blank lines.
1015 if len1 == 0:
1016 continue
1017 indent = len1 - len2
1018 # Empty line due to comment.
1019 if len(t) == 0:
1020 # Indentation must be correct, even for comment lines.
1021 if indent != nesting:
1022 error(lineno, 'indentation ', indent, ' != ', nesting)
1023 continue
1024 start_lineno = lineno
1025 toks = t
1026
1027 # Continuation?
1028 if toks[-1] == '\\':
1029 toks.pop()
1030 continue
1031
1032 name = toks[0]
1033 del toks[0]
1034
1035 # End nesting?
1036 if name == '}' or name == ']':
1037 if len(toks) != 0:
1038 error(start_lineno, 'extra tokens after close brace')
1039
1040 # Make sure { } and [ ] nest properly.
1041 if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1042 error(lineno, 'mismatched close brace')
1043
1044 try:
1045 parent_pat = nesting_pats.pop()
1046 except:
1047 error(lineno, 'extra close brace')
1048
1049 nesting -= 2
1050 if indent != nesting:
1051 error(lineno, 'indentation ', indent, ' != ', nesting)
1052
1053 toks = []
1054 continue
1055
1056 # Everything else should have current indentation.
1057 if indent != nesting:
1058 error(start_lineno, 'indentation ', indent, ' != ', nesting)
1059
1060 # Start nesting?
1061 if name == '{' or name == '[':
1062 if len(toks) != 0:
1063 error(start_lineno, 'extra tokens after open brace')
1064
1065 if name == '{':
1066 nested_pat = IncMultiPattern(start_lineno)
1067 else:
1068 nested_pat = ExcMultiPattern(start_lineno)
1069 parent_pat.pats.append(nested_pat)
1070 nesting_pats.append(parent_pat)
1071 parent_pat = nested_pat
1072
1073 nesting += 2
1074 toks = []
1075 continue
1076
1077 # Determine the type of object needing to be parsed.
1078 if re.fullmatch(re_fld_ident, name):
1079 parse_field(start_lineno, name[1:], toks)
1080 elif re.fullmatch(re_arg_ident, name):
1081 parse_arguments(start_lineno, name[1:], toks)
1082 elif re.fullmatch(re_fmt_ident, name):
1083 parse_generic(start_lineno, None, name[1:], toks)
1084 elif re.fullmatch(re_pat_ident, name):
1085 parse_generic(start_lineno, parent_pat, name, toks)
1086 else:
1087 error(lineno, 'invalid token "{0}"'.format(name))
1088 toks = []
1089
1090 if nesting != 0:
1091 error(lineno, 'missing close brace')
1092 # end parse_file
1093
1094
1095 class SizeTree:
1096 """Class representing a node in a size decode tree"""
1097
1098 def __init__(self, m, w):
1099 self.mask = m
1100 self.subs = []
1101 self.base = None
1102 self.width = w
1103
1104 def str1(self, i):
1105 ind = str_indent(i)
1106 r = '{0}{1:08x}'.format(ind, self.mask)
1107 r += ' [\n'
1108 for (b, s) in self.subs:
1109 r += '{0} {1:08x}:\n'.format(ind, b)
1110 r += s.str1(i + 4) + '\n'
1111 r += ind + ']'
1112 return r
1113
1114 def __str__(self):
1115 return self.str1(0)
1116
1117 def output_code(self, i, extracted, outerbits, outermask):
1118 ind = str_indent(i)
1119
1120 # If we need to load more bytes to test, do so now.
1121 if extracted < self.width:
1122 output(ind, 'insn = ', decode_function,
1123 '_load_bytes(ctx, insn, {0}, {1});\n'
1124 .format(extracted // 8, self.width // 8));
1125 extracted = self.width
1126
1127 # Attempt to aid the compiler in producing compact switch statements.
1128 # If the bits in the mask are contiguous, extract them.
1129 sh = is_contiguous(self.mask)
1130 if sh > 0:
1131 # Propagate SH down into the local functions.
1132 def str_switch(b, sh=sh):
1133 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
1134
1135 def str_case(b, sh=sh):
1136 return '0x{0:x}'.format(b >> sh)
1137 else:
1138 def str_switch(b):
1139 return 'insn & 0x{0:08x}'.format(b)
1140
1141 def str_case(b):
1142 return '0x{0:08x}'.format(b)
1143
1144 output(ind, 'switch (', str_switch(self.mask), ') {\n')
1145 for b, s in sorted(self.subs):
1146 innermask = outermask | self.mask
1147 innerbits = outerbits | b
1148 output(ind, 'case ', str_case(b), ':\n')
1149 output(ind, ' /* ',
1150 str_match_bits(innerbits, innermask), ' */\n')
1151 s.output_code(i + 4, extracted, innerbits, innermask)
1152 output(ind, '}\n')
1153 output(ind, 'return insn;\n')
1154 # end SizeTree
1155
1156 class SizeLeaf:
1157 """Class representing a leaf node in a size decode tree"""
1158
1159 def __init__(self, m, w):
1160 self.mask = m
1161 self.width = w
1162
1163 def str1(self, i):
1164 ind = str_indent(i)
1165 return '{0}{1:08x}'.format(ind, self.mask)
1166
1167 def __str__(self):
1168 return self.str1(0)
1169
1170 def output_code(self, i, extracted, outerbits, outermask):
1171 global decode_function
1172 ind = str_indent(i)
1173
1174 # If we need to load more bytes, do so now.
1175 if extracted < self.width:
1176 output(ind, 'insn = ', decode_function,
1177 '_load_bytes(ctx, insn, {0}, {1});\n'
1178 .format(extracted // 8, self.width // 8));
1179 extracted = self.width
1180 output(ind, 'return insn;\n')
1181 # end SizeLeaf
1182
1183
1184 def build_size_tree(pats, width, outerbits, outermask):
1185 global insnwidth
1186
1187 # Collect the mask of bits that are fixed in this width
1188 innermask = 0xff << (insnwidth - width)
1189 innermask &= ~outermask
1190 minwidth = None
1191 onewidth = True
1192 for i in pats:
1193 innermask &= i.fixedmask
1194 if minwidth is None:
1195 minwidth = i.width
1196 elif minwidth != i.width:
1197 onewidth = False;
1198 if minwidth < i.width:
1199 minwidth = i.width
1200
1201 if onewidth:
1202 return SizeLeaf(innermask, minwidth)
1203
1204 if innermask == 0:
1205 if width < minwidth:
1206 return build_size_tree(pats, width + 8, outerbits, outermask)
1207
1208 pnames = []
1209 for p in pats:
1210 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1211 error_with_file(pats[0].file, pats[0].lineno,
1212 'overlapping patterns size {0}:'.format(width), pnames)
1213
1214 bins = {}
1215 for i in pats:
1216 fb = i.fixedbits & innermask
1217 if fb in bins:
1218 bins[fb].append(i)
1219 else:
1220 bins[fb] = [i]
1221
1222 fullmask = outermask | innermask
1223 lens = sorted(bins.keys())
1224 if len(lens) == 1:
1225 b = lens[0]
1226 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1227
1228 r = SizeTree(innermask, width)
1229 for b, l in bins.items():
1230 s = build_size_tree(l, width, b | outerbits, fullmask)
1231 r.subs.append((b, s))
1232 return r
1233 # end build_size_tree
1234
1235
1236 def prop_size(tree):
1237 """Propagate minimum widths up the decode size tree"""
1238
1239 if isinstance(tree, SizeTree):
1240 min = None
1241 for (b, s) in tree.subs:
1242 width = prop_size(s)
1243 if min is None or min > width:
1244 min = width
1245 assert min >= tree.width
1246 tree.width = min
1247 else:
1248 min = tree.width
1249 return min
1250 # end prop_size
1251
1252
1253 def main():
1254 global arguments
1255 global formats
1256 global allpatterns
1257 global translate_scope
1258 global translate_prefix
1259 global output_fd
1260 global output_file
1261 global input_file
1262 global insnwidth
1263 global insntype
1264 global insnmask
1265 global decode_function
1266 global variablewidth
1267 global anyextern
1268
1269 decode_scope = 'static '
1270
1271 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
1272 'static-decode=', 'varinsnwidth=']
1273 try:
1274 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
1275 except getopt.GetoptError as err:
1276 error(0, err)
1277 for o, a in opts:
1278 if o in ('-o', '--output'):
1279 output_file = a
1280 elif o == '--decode':
1281 decode_function = a
1282 decode_scope = ''
1283 elif o == '--static-decode':
1284 decode_function = a
1285 elif o == '--translate':
1286 translate_prefix = a
1287 translate_scope = ''
1288 elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1289 if o == '--varinsnwidth':
1290 variablewidth = True
1291 insnwidth = int(a)
1292 if insnwidth == 16:
1293 insntype = 'uint16_t'
1294 insnmask = 0xffff
1295 elif insnwidth != 32:
1296 error(0, 'cannot handle insns of width', insnwidth)
1297 else:
1298 assert False, 'unhandled option'
1299
1300 if len(args) < 1:
1301 error(0, 'missing input file')
1302
1303 toppat = ExcMultiPattern(0)
1304
1305 for filename in args:
1306 input_file = filename
1307 f = open(filename, 'r')
1308 parse_file(f, toppat)
1309 f.close()
1310
1311 # We do not want to compute masks for toppat, because those masks
1312 # are used as a starting point for build_tree. For toppat, we must
1313 # insist that decode begins from naught.
1314 for i in toppat.pats:
1315 i.prop_masks()
1316
1317 toppat.build_tree()
1318 toppat.prop_format()
1319
1320 if variablewidth:
1321 for i in toppat.pats:
1322 i.prop_width()
1323 stree = build_size_tree(toppat.pats, 8, 0, 0)
1324 prop_size(stree)
1325
1326 if output_file:
1327 output_fd = open(output_file, 'w')
1328 else:
1329 output_fd = sys.stdout
1330
1331 output_autogen()
1332 for n in sorted(arguments.keys()):
1333 f = arguments[n]
1334 f.output_def()
1335
1336 # A single translate function can be invoked for different patterns.
1337 # Make sure that the argument sets are the same, and declare the
1338 # function only once.
1339 #
1340 # If we're sharing formats, we're likely also sharing trans_* functions,
1341 # but we can't tell which ones. Prevent issues from the compiler by
1342 # suppressing redundant declaration warnings.
1343 if anyextern:
1344 output("#pragma GCC diagnostic push\n",
1345 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1346 "#ifdef __clang__\n"
1347 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
1348 "#endif\n\n")
1349
1350 out_pats = {}
1351 for i in allpatterns:
1352 if i.name in out_pats:
1353 p = out_pats[i.name]
1354 if i.base.base != p.base.base:
1355 error(0, i.name, ' has conflicting argument sets')
1356 else:
1357 i.output_decl()
1358 out_pats[i.name] = i
1359 output('\n')
1360
1361 if anyextern:
1362 output("#pragma GCC diagnostic pop\n\n")
1363
1364 for n in sorted(formats.keys()):
1365 f = formats[n]
1366 f.output_extract()
1367
1368 output(decode_scope, 'bool ', decode_function,
1369 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1370
1371 i4 = str_indent(4)
1372
1373 if len(allpatterns) != 0:
1374 output(i4, 'union {\n')
1375 for n in sorted(arguments.keys()):
1376 f = arguments[n]
1377 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1378 output(i4, '} u;\n\n')
1379 toppat.output_code(4, False, 0, 0)
1380
1381 output(i4, 'return false;\n')
1382 output('}\n')
1383
1384 if variablewidth:
1385 output('\n', decode_scope, insntype, ' ', decode_function,
1386 '_load(DisasContext *ctx)\n{\n',
1387 ' ', insntype, ' insn = 0;\n\n')
1388 stree.output_code(4, 0, 0, 0)
1389 output('}\n')
1390
1391 if output_file:
1392 output_fd.close()
1393 # end main
1394
1395
1396 if __name__ == '__main__':
1397 main()