def _objdump_lexer_tokens(asm_lexer): """
Common objdump lexer tokens to wrap an ASM lexer. """
hex_re = r'[0-9A-Za-z]' return { 'root': [ # File name & format:
('(.*?)(:)( +file format )(.*?)$',
bygroups(Name.Label, Punctuation, Text, String)), # Section header
('(Disassembly of section )(.*?)(:)$',
bygroups(Text, Name.Label, Punctuation)), # Function labels # (With offset)
('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
Punctuation, Number.Hex, Punctuation)), # (Without offset)
('('+hex_re+'+)( )(<)(.*?)(>:)$',
bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
Punctuation)), # Code line with disassembled instructions
('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace,
using(asm_lexer))), # Code line without raw instructions (objdump --no-show-raw-insn)
('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$',
bygroups(Whitespace, Name.Label, Whitespace,
using(asm_lexer))), # Code line with ascii
('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)), # Continued code line, only raw opcodes without disassembled # instruction
('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)), # Skipped a few bytes
(r'\t\.\.\.$', Text), # Relocation line # (With offset)
(r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
Name.Constant, Punctuation, Number.Hex)), # (Without offset)
(r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
Name.Constant)),
(r'[^\n]+\n', Other)
]
}
class ObjdumpLexer(RegexLexer): """ For the output of ``objdump -dr``. """
name = 'objdump'
aliases = ['objdump']
filenames = ['*.objdump']
mimetypes = ['text/x-objdump']
url = 'https://www.gnu.org/software/binutils'
version_added = ''
tokens = _objdump_lexer_tokens(GasLexer)
class DObjdumpLexer(DelegatingLexer): """ For the output of ``objdump -Sr`` on compiled D files. """
name = 'd-objdump'
aliases = ['d-objdump']
filenames = ['*.d-objdump']
mimetypes = ['text/x-d-objdump']
url = 'https://www.gnu.org/software/binutils'
version_added = ''
class CObjdumpLexer(DelegatingLexer): """ For the output of ``objdump -Sr`` on compiled C files. """
name = 'c-objdump'
aliases = ['c-objdump']
filenames = ['*.c-objdump']
mimetypes = ['text/x-c-objdump']
url = 'https://www.gnu.org/software/binutils'
version_added = ''
class LlvmMirBodyLexer(RegexLexer): """ For LLVM MIR examples without the YAML wrapper. """
name = 'LLVM-MIR Body'
url = 'https://llvm.org/docs/MIRLangRef.html'
aliases = ['llvm-mir-body']
filenames = []
mimetypes = []
version_added = '2.6'
tokens = { 'root': [ # Attributes on basic blocks
(words(('liveins', 'successors'), suffix=':'), Keyword), # Basic Block Labels
(r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label),
(r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label),
(r'%bb\.[0-9]+(\.\w+)?', Name.Label), # Stack references
(r'%stack\.[0-9]+(\.\w+\.addr)?', Name), # Subreg indices
(r'%subreg\.\w+', Name), # Virtual registers
(r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'), # Reference to LLVM-IR global
include('global'), # Reference to Intrinsic
(r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global), # Comparison predicates
(words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult', 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
(words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge', 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'),
Name.Builtin), # Physical registers
(r'\$\w+', String.Single), # Assignment operator
(r'=', Operator), # gMIR Opcodes
(r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|'
r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|'
r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|'
r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|'
r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'
r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|'
r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|'
r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|'
r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|'
r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|'
r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|'
r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|'
r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|'
r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'
r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'
r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'
r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'
r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'
r'FSUB)'
r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|'
r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'
r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'
r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'
r'G_SHUFFLE_VECTOR)\b',
Name.Builtin), # Target independent opcodes
(r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',
Name.Builtin), # Flags
(words(('killed', 'implicit')), Keyword), # ConstantInt values
(r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'), # ConstantFloat values
(r'(half|float|double) +', Keyword.Type, 'constantfloat'), # Bare immediates
include('integer'), # MMO's
(r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'), # MIR Comments
(r';.*', Comment), # If we get here, assume it's a target instruction
(r'[a-zA-Z0-9_]+', Name), # Everything else that isn't highlighted
(r'[(), \n]+', Text),
], # The integer constant from a ConstantInt value 'constantint': [
include('integer'),
(r'(?=.)', Text, '#pop'),
], # The floating point constant from a ConstantFloat value 'constantfloat': [
include('float'),
(r'(?=.)', Text, '#pop'),
], 'vreg': [ # The bank or class if there is one
(r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')), # The LLT if there is one
(r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
(r'(?=.)', Text, '#pop'),
], 'vreg_bank_or_class': [ # The unassigned bank/class
(r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)),
(r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)), # The LLT if there is one
(r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
(r'(?=.)', Text, '#pop'),
], 'vreg_type': [ # Scalar and pointer types
(r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)),
(r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)),
(r'\)', Text, '#pop'),
(r'(?=.)', Text, '#pop'),
], 'mmo': [
(r'\(', Text),
(r' +', Whitespace),
(words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic', 'acquire', 'release', 'acq_rel', 'seq_cst')),
Keyword), # IR references
(r'%ir\.[a-zA-Z0-9_.-]+', Name),
(r'%ir-block\.[a-zA-Z0-9_.-]+', Name),
(r'[-+]', Operator),
include('integer'),
include('global'),
(r',', Punctuation),
(r'\), \(', Text),
(r'\)', Text, '#pop'),
], 'integer': [(r'-?[0-9]+', Number.Integer),], 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)], 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],
}
class LlvmMirLexer(RegexLexer): """
Lexer for the overall LLVM MIR document format.
MIR is a human readable serialization format that's used to represent LLVM's
machine specific intermediate representation. It allows LLVM's developers to
see the state of the compilation process at various points, as well as test
individual pieces of the compiler. """
name = 'LLVM-MIR'
url = 'https://llvm.org/docs/MIRLangRef.html'
aliases = ['llvm-mir']
filenames = ['*.mir']
version_added = '2.6'
tokens = { 'root': [ # Comments are hashes at the YAML level
(r'#.*', Comment), # Documents starting with | are LLVM-IR
(r'--- \|$', Keyword, 'llvm_ir'), # Other documents are MIR
(r'---', Keyword, 'llvm_mir'), # Consume everything else in one token for efficiency
(r'[^-#]+|.', Text),
], 'llvm_ir': [ # Documents end with '...' or '---'
(r'(\.\.\.|(?=---))', Keyword, '#pop'), # Delegate to the LlvmLexer
(r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
], 'llvm_mir': [ # Comments are hashes at the YAML level
(r'#.*', Comment), # Documents end with '...' or '---'
(r'(\.\.\.|(?=---))', Keyword, '#pop'), # Handle the simple attributes
(r'name:', Keyword, 'name'),
(words(('alignment', ),
suffix=':'), Keyword, 'number'),
(words(('legalized', 'regBankSelected', 'tracksRegLiveness', 'selected', 'exposesReturnsTwice'),
suffix=':'), Keyword, 'boolean'), # Handle the attributes don't highlight inside
(words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo', 'machineFunctionInfo'),
suffix=':'), Keyword), # Delegate the body block to the LlvmMirBodyLexer
(r'body: *\|', Keyword, 'llvm_mir_body'), # Consume everything else
(r'.+', Text),
(r'\n', Whitespace),
], 'name': [
(r'[^\n]+', Name),
default('#pop'),
], 'boolean': [
(r' *(true|false)', Name.Builtin),
default('#pop'),
], 'number': [
(r' *[0-9]+', Number),
default('#pop'),
], 'llvm_mir_body': [ # Documents end with '...' or '---'. # We have to pop llvm_mir_body and llvm_mir
(r'(\.\.\.|(?=---))', Keyword, '#pop:2'), # Delegate the body block to the LlvmMirBodyLexer
(r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))), # The '...' is optional. If we didn't already find it then it isn't # there. There might be a '---' instead though.
(r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),
],
}
class NasmLexer(RegexLexer): """ For Nasm (Intel) assembly code. """
name = 'NASM'
aliases = ['nasm']
filenames = ['*.asm', '*.ASM', '*.nasm']
mimetypes = ['text/x-nasm']
url = 'https://nasm.us'
version_added = ''
# Tasm uses the same file endings, but TASM is not as common as NASM, so # we prioritize NASM higher by default
priority = 1.0
identifier = r'[a-z$._?][\w$.?#@~]*'
hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
octn = r'[0-7]+q'
binn = r'[01]+b'
decn = r'[0-9]+'
floatn = decn + r'\.e?' + decn
string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
declkw = r'(?:res|d)[bwdqt]|times'
register = (r'(r[0-9][0-5]?[bwd]?|'
r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|'
r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b')
wordop = r'seg|wrt|strict|rel|abs'
type = r'byte|[dq]?word' # Directives must be followed by whitespace, otherwise CPU will match # cpuid for instance.
directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
r'EXPORT|LIBRARY|MODULE)(?=\s)')
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.