# A character constant is a sequence of the form #s, where s is a string # constant denoting a string of size one character. This setup just parses # the entire string as either a String.Double or a String.Char (depending # on the argument), even if the String.Char is an erroneous # multiple-character string. def stringy(whatkind): return [
(r'[^"\\]', whatkind),
(r'\\[\\"abtnvfr]', String.Escape), # Control-character notation is used for codes < 32, # where \^@ == \000
(r'\\\^[\x40-\x5e]', String.Escape), # Docs say 'decimal digits'
(r'\\[0-9]{3}', String.Escape),
(r'\\u[0-9a-fA-F]{4}', String.Escape),
(r'\\\s+\\', String.Interpol),
(r'"', whatkind, '#pop'),
]
# Callbacks for distinguishing tokens and reserved words def long_id_callback(self, match): if match.group(1) in self.alphanumid_reserved:
token = Error else:
token = Name.Namespace yield match.start(1), token, match.group(1) yield match.start(2), Punctuation, match.group(2)
def end_id_callback(self, match): if match.group(1) in self.alphanumid_reserved:
token = Error elif match.group(1) in self.symbolicid_reserved:
token = Error else:
token = Name yield match.start(1), token, match.group(1)
def id_callback(self, match):
str = match.group(1) if str in self.alphanumid_reserved:
token = Keyword.Reserved elif str in self.symbolicid_reserved:
token = Punctuation else:
token = Name yield match.start(1), token, str
tokens = { # Whitespace and comments are (almost) everywhere 'whitespace': [
(r'\s+', Text),
(r'\(\*', Comment.Multiline, 'comment'),
],
'delimiters': [ # This lexer treats these delimiters specially: # Delimiters define scopes, and the scope is how the meaning of # the `|' is resolved - is it a case/handle expression, or function # definition by cases? (This is not how the Definition works, but # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
(r'\(|\[|\{', Punctuation, 'main'),
(r'\)|\]|\}', Punctuation, '#pop'),
(r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
(r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
(r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
],
'core': [ # Punctuation that doesn't overlap symbolic identifiers
(r'({})'.format('|'.join(re.escape(z) for z in nonid_reserved)),
Punctuation),
# Special constants: strings, floats, numbers in decimal and hex
(r'#"', String.Char, 'char'),
(r'"', String.Double, 'string'),
(r'~?0x[0-9a-fA-F]+', Number.Hex),
(r'0wx[0-9a-fA-F]+', Number.Hex),
(r'0w\d+', Number.Integer),
(r'~?\d+\.\d+[eE]~?\d+', Number.Float),
(r'~?\d+\.\d+', Number.Float),
(r'~?\d+[eE]~?\d+', Number.Float),
(r'~?\d+', Number.Integer),
# Labels
(r'#\s*[1-9][0-9]*', Name.Label),
(rf'#\s*({alphanumid_re})', Name.Label),
(rf'#\s+({symbolicid_re})', Name.Label), # Some reserved words trigger a special, local lexer state change
(r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
(r'\b(exception)\b(?!\')', Keyword.Reserved, 'ename'),
(r'\b(functor|include|open|signature|structure)\b(?!\')',
Keyword.Reserved, 'sname'),
(r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
# Main parser (prevents errors in files that have scoping errors) 'root': [
default('main')
],
# In this scope, I expect '|' to not be followed by a function name, # and I expect 'and' to be followed by a binding site 'main': [
include('whitespace'),
# Special behavior of val/and/fun
(r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
(r'\b(fun)\b(?!\')', Keyword.Reserved,
('#pop', 'main-fun', 'fname')),
# Special behavior of val/and/fun
(r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
(r'\b(val)\b(?!\')', Keyword.Reserved,
('#pop', 'main', 'vname')),
# Special behavior of '|' and '|'-manipulating keywords
(r'\|', Punctuation, 'fname'),
(r'\b(case|handle)\b(?!\')', Keyword.Reserved,
('#pop', 'main')),
# Dealing with what comes after the 'fun' (or 'and' or '|') keyword 'fname': [
include('whitespace'),
(r'\'[\w\']*', Name.Decorator),
(r'\(', Punctuation, 'tyvarseq'),
# Ignore interesting function declarations like "fun (x + y) = ..."
default('#pop'),
],
# Dealing with what comes after the 'val' (or 'and') keyword 'vname': [
include('whitespace'),
(r'\'[\w\']*', Name.Decorator),
(r'\(', Punctuation, 'tyvarseq'),
(r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
String.Char),
(r"'.'", String.Char),
(r"'", Keyword), # a stray quote is another syntax element
# most of these aren't strictly keywords # but if you color only real keywords, you might just # as well not color anything
keywords = ( 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do', 'else', 'end', 'external', 'forall', 'function', 'if', 'import', 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then', 'type', 'val', 'with', 'xml_parser',
)
# matches both stuff and `stuff`
ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
op_re = r'[.=\-<>,@~%/+?*&^!]'
punc_re = r'[()\[\],;|]'# '{' and '}' are treated elsewhere # because they are also used for inserts
tokens = { # copied from the caml lexer, should be adapted 'escape-sequence': [
(r'\\[\\"\'ntr}]', String.Escape),
(r'\\[0-9]{3}', String.Escape),
(r'\\x[0-9a-fA-F]{2}', String.Escape),
],
# factorizing these rules, because they are inserted many times 'comments': [
(r'/\*', Comment, 'nested-comment'),
(r'//.*?$', Comment),
], 'comments-and-spaces': [
include('comments'),
(r'\s+', Text),
],
'root': [
include('comments-and-spaces'), # keywords
(words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), # directives # we could parse the actual set of directives instead of anything # starting with @, but this is troublesome # because it needs to be adjusted all the time # and assuming we parse only sources that compile, it is useless
(r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
# string literals
(r'"', String.Double, 'string'), # char literal, should be checked because this is the regexp from # the caml lexer
(r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
String.Char),
# this is meant to deal with embedded exprs in strings # every time we find a '}' we pop a state so that if we were # inside a string, we are back in the string state # as a consequence, we must also push a state every time we find a # '{' or else we will have errors when parsing {} for instance
(r'\{', Operator, '#push'),
(r'\}', Operator, '#pop'),
# html literals # this is a much more strict that the actual parser, # since a<b would not be parsed as html # but then again, the parser is way too lax, and we can't hope # to have something as tolerant
(r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
# db path # matching the '[_]' in '/a[_]' because it is a part # of the syntax of the db path definition # unfortunately, i don't know how to match the ']' in # /a[1], so this is somewhat inconsistent
(r'[@?!]?(/\w+)+(\[_\])?', Name.Variable), # putting the same color on <- as on db path, since # it can be used only to mean Db.write
(r'<-(?!'+op_re+r')', Name.Variable),
# 'modules' # although modules are not distinguished by their names as in caml # the standard library seems to follow the convention that modules # only area capitalized
(r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
# operators # = has a special role because this is the only # way to syntactic distinguish binding constructions # unfortunately, this colors the equal in {x=2} too
(r'=(?!'+op_re+r')', Keyword),
(rf'({op_re})+', Operator),
(rf'({punc_re})+', Operator),
# coercions
(r':', Operator, 'type'), # type variables # we need this rule because we don't parse specially type # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
("'"+ident_re, Keyword.Type),
# id literal, #something, or #{expr}
(r'#'+ident_re, String.Single),
(r'#(?=\{)', String.Single),
# identifiers # this avoids to color '2' in 'a2' as an integer
(ident_re, Text),
# default, not sure if that is needed or not # (r'.', Text),
],
# it is quite painful to have to parse types to know where they end # this is the general rule for a type # a type is either: # * -> ty # * type-with-slash # * type-with-slash -> ty # * type-with-slash (, type-with-slash)+ -> ty # # the code is pretty funky in here, but this code would roughly # translate in caml to: # let rec type stream = # match stream with # | [< "->"; stream >] -> type stream # | [< ""; stream >] -> # type_with_slash stream # type_lhs_1 stream; # and type_1 stream = ... 'type': [
include('comments-and-spaces'),
(r'->', Keyword.Type),
default(('#pop', 'type-lhs-1', 'type-with-slash')),
],
# parses all the atomic or closed constructions in the syntax of type # expressions: record types, tuple types, type constructors, basic type # and type variables 'type-1': [
include('comments-and-spaces'),
(r'\(', Keyword.Type, ('#pop', 'type-tuple')),
(r'~?\{', Keyword.Type, ('#pop', 'type-record')),
(ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
(ident_re, Keyword.Type, '#pop'),
("'"+ident_re, Keyword.Type), # this case is not in the syntax but sometimes # we think we are parsing types when in fact we are parsing # some css, so we just pop the states until we get back into # the root state
default('#pop'),
],
# we go in this state after having parsed a type-with-slash # while trying to parse a type # and at this point we must determine if we are parsing an arrow # type (in which case we must continue parsing) or not (in which # case we stop) 'type-lhs-1': [
include('comments-and-spaces'),
(r'->', Keyword.Type, ('#pop', 'type')),
(r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
default('#pop'),
], 'type-arrow': [
include('comments-and-spaces'), # the look ahead here allows to parse f(x : int, y : float -> truc) # correctly
(r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
(r'->', Keyword.Type, ('#pop', 'type')), # same remark as above
default('#pop'),
],
# no need to do precise parsing for tuples and records # because they are closed constructions, so we can simply # find the closing delimiter # note that this function would be not work if the source # contained identifiers like `{)` (although it could be patched # to support it) 'type-tuple': [
include('comments-and-spaces'),
(r'[^()/*]+', Keyword.Type),
(r'[/*]', Keyword.Type),
(r'\(', Keyword.Type, '#push'),
(r'\)', Keyword.Type, '#pop'),
], 'type-record': [
include('comments-and-spaces'),
(r'[^{}/*]+', Keyword.Type),
(r'[/*]', Keyword.Type),
(r'\{', Keyword.Type, '#push'),
(r'\}', Keyword.Type, '#pop'),
],
# the copy pasting between string and single-string # is kinda sad. Is there a way to avoid that?? 'string': [
(r'[^\\"{]+', String.Double),
(r'"', String.Double, '#pop'),
(r'\{', Operator, 'root'),
include('escape-sequence'),
], 'single-string': [
(r'[^\\\'{]+', String.Double),
(r'\'', String.Double, '#pop'),
(r'\{', Operator, 'root'),
include('escape-sequence'),
],
# all the html stuff # can't really reuse some existing html parser # because we must be able to parse embedded expressions
# we are in this state after someone parsed the '<' that # started the html literal 'html-open-tag': [
(r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
(r'>', String.Single, ('#pop', 'html-content')),
],
# we are in this state after someone parsed the '</' that # started the end of the closing tag 'html-end-tag': [ # this is a star, because </> is allowed
(r'[\w\-:]*>', String.Single, '#pop'),
],
# we are in this state after having parsed '<ident(:ident)?' # we thus parse a possibly empty list of attributes 'html-attr': [
(r'\s+', Text),
(r'[\w\-:]+=', String.Single, 'html-attr-value'),
(r'/>', String.Single, '#pop'),
(r'>', String.Single, ('#pop', 'html-content')),
],
(r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
String.Char),
(r"'.'", String.Char),
(r"'", Keyword), # a stray quote is another syntax element
(r"\`([\w\'.]+)\`", Operator.Word), # for infix applications
(r"\`", Keyword), # for quoting
(r'"', String.Double, 'string'),
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.24Bemerkung:
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.