:copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details. """
import re
from pygments.lexer import RegexLexer, include from pygments.util import get_bool_opt, get_list_opt from pygments.token import Text, Comment, Operator, Keyword, Name, \
String, Number, Punctuation, Error
__all__ = ['Modula2Lexer']
# Multi-Dialect Modula-2 Lexer class Modula2Lexer(RegexLexer): """ For Modula-2 source code.
The Modula-2 lexer supports several dialects. By default, it operates in
fallback mode, recognising the *combined* literals, punctuation symbols and operators of all supported dialects, and the *combined* reserved words and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, whilenot
differentiating between library defined identifiers.
To select a specific dialect, a dialect option may be passed or a dialect tag may be embedded into a source file.
The PIM and ISO dialect options may be qualified with a language extension.
Language Extensions:
`+aglet`
Select Aglet Modula-2 extensions, available with m2iso.
`+gm2`
Select GNU Modula-2 extensions, available with m2pim.
`+p1`
Select p1 Modula-2 extensions, available with m2iso.
`+xds`
Select XDS Modula-2 extensions, available with m2iso.
Passing a Dialect Option via Unix Commandline Interface
Dialect options may be passed to the lexer using the `dialect` key.
Only one such option should be passed. If multiple dialect options are
passed, the first valid option is used, any subsequent options are ignored.
Examples:
`$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
Use ISO dialect to render input to HTML output
`$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
Use ISO dialect with p1 extensions to render input to RTF output
Embedding a Dialect Option within a source file
A dialect option may be embedded in a source file in form of a dialect
tag, a specially formatted comment that specifies a dialect option.
No whitespace is permitted between the tokens of a dialect tag.
In the event that a source file contains multiple dialect tags, the first
tag that contains a valid dialect option will be used and any subsequent
dialect tags will be ignored. Ideally, a dialect tag should be placed
at the beginning of a source file.
An embedded dialect tag overrides a dialect option set via command line.
Examples:
``(*!m2r10*) DEFINITION MODULE Foobar; ...``
Use Modula2 R10 dialect to render this source file.
``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...``
Use PIM dialect with GNU extensions to render this source file.
Algol Publication Mode:
In Algol publication mode, source text is rendered for publication of
algorithms in scientific papers and academic texts, following the format
of the Revised Algol-60 Language Report. It is activated by passing
one of two corresponding styles as an option:
`algol`
render reserved words lowercase underline boldface and builtins lowercase boldface italic
`algol_nu`
render reserved words lowercase boldface (no underlining) and builtins lowercase boldface italic
The lexer automatically performs the required lowercase conversion when
this mode is activated.
Example:
``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input``
Render input file in Algol publication mode to LaTeX output.
Rendering Mode of First Class ADT Identifiers:
The rendering of standard library first class ADT identifiers is controlled
by option flag "treat_stdlib_adts_as_builtins".
When this option is turned on, standard library ADT identifiers are rendered as builtins. When it is turned off, they are rendered as ordinary library
identifiers.
`treat_stdlib_adts_as_builtins` (default: On)
The option is useful for dialects that support ADTs as first class objects and provide ADTs in the standard library that would otherwise be built-in.
At present, only Modula-2 R10 supports library ADTs as first class objects and therefore, no ADT identifiers are defined for any other dialects.
Example:
``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...``
Render standard library ADTs as ordinary library types.
# Lexemes to Mark as Error Tokens for Modula-2 R10
m2r10_lexemes_to_reject = ( '!', '`', '@', '$', '%', '&', '<>',
)
# Modula-2 R10 reserved words in addition to the common set
m2r10_additional_reserved_words = ( # 12 additional reserved words 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE', 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN', # 2 additional reserved words with symbolic assembly option 'ASM', 'REG',
)
# Modula-2 R10 builtins in addition to the common set
m2r10_additional_builtins = ( # 26 additional builtins 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD', 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT', 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'WRITE', 'WRITEF',
)
# Lexemes to Mark as Errors Database
lexemes_to_reject_db = { # Lexemes to reject for unknown dialect 'unknown': ( # LEAVE THIS EMPTY
), # Lexemes to reject for PIM Modula-2 'm2pim': (
pim_lexemes_to_reject,
), # Lexemes to reject for ISO Modula-2 'm2iso': (
iso_lexemes_to_reject,
), # Lexemes to reject for Modula-2 R10 'm2r10': (
m2r10_lexemes_to_reject,
), # Lexemes to reject for Objective Modula-2 'objm2': (
objm2_lexemes_to_reject,
), # Lexemes to reject for Aglet Modula-2 'm2iso+aglet': (
iso_lexemes_to_reject,
), # Lexemes to reject for GNU Modula-2 'm2pim+gm2': (
pim_lexemes_to_reject,
), # Lexemes to reject for p1 Modula-2 'm2iso+p1': (
iso_lexemes_to_reject,
), # Lexemes to reject for XDS Modula-2 'm2iso+xds': (
iso_lexemes_to_reject,
),
}
# Reserved Words Database
reserved_words_db = { # Reserved words for unknown dialect 'unknown': (
common_reserved_words,
pim_additional_reserved_words,
iso_additional_reserved_words,
m2r10_additional_reserved_words,
),
# Reserved words for PIM Modula-2 'm2pim': (
common_reserved_words,
pim_additional_reserved_words,
),
# Reserved words for Modula-2 R10 'm2iso': (
common_reserved_words,
iso_additional_reserved_words,
),
# Reserved words for ISO Modula-2 'm2r10': (
common_reserved_words,
m2r10_additional_reserved_words,
),
# Reserved words for Objective Modula-2 'objm2': (
common_reserved_words,
m2r10_additional_reserved_words,
objm2_additional_reserved_words,
),
# Reserved words for Aglet Modula-2 Extensions 'm2iso+aglet': (
common_reserved_words,
iso_additional_reserved_words,
aglet_additional_reserved_words,
),
# Reserved words for GNU Modula-2 Extensions 'm2pim+gm2': (
common_reserved_words,
pim_additional_reserved_words,
gm2_additional_reserved_words,
),
# Reserved words for p1 Modula-2 Extensions 'm2iso+p1': (
common_reserved_words,
iso_additional_reserved_words,
p1_additional_reserved_words,
),
# Reserved words for XDS Modula-2 Extensions 'm2iso+xds': (
common_reserved_words,
iso_additional_reserved_words,
xds_additional_reserved_words,
),
}
# Standard Library ADTs Database
stdlib_adts_db = { # Empty entry for unknown dialect 'unknown': ( # LEAVE THIS EMPTY
), # Standard Library ADTs for PIM Modula-2 'm2pim': ( # No first class library types
),
# Standard Library ADTs for ISO Modula-2 'm2iso': ( # No first class library types
),
# Standard Library ADTs for Modula-2 R10 'm2r10': (
m2r10_stdlib_adt_identifiers,
),
# Standard Library ADTs for Objective Modula-2 'objm2': (
m2r10_stdlib_adt_identifiers,
),
# Standard Library ADTs for Aglet Modula-2 'm2iso+aglet': ( # No first class library types
),
# Standard Library ADTs for GNU Modula-2 'm2pim+gm2': ( # No first class library types
),
# Standard Library ADTs for p1 Modula-2 'm2iso+p1': ( # No first class library types
),
# Standard Library ADTs for XDS Modula-2 'm2iso+xds': ( # No first class library types
),
}
# Standard Library Modules Database
stdlib_modules_db = { # Empty entry for unknown dialect 'unknown': ( # LEAVE THIS EMPTY
), # Standard Library Modules for PIM Modula-2 'm2pim': (
pim_stdlib_module_identifiers,
),
# Standard Library Modules for ISO Modula-2 'm2iso': (
iso_stdlib_module_identifiers,
),
# Standard Library Modules for Modula-2 R10 'm2r10': (
m2r10_stdlib_blueprint_identifiers,
m2r10_stdlib_module_identifiers,
m2r10_stdlib_adt_identifiers,
),
# Standard Library Modules for Objective Modula-2 'objm2': (
m2r10_stdlib_blueprint_identifiers,
m2r10_stdlib_module_identifiers,
),
# Standard Library Modules for Aglet Modula-2 'm2iso+aglet': (
iso_stdlib_module_identifiers,
),
# Standard Library Modules for GNU Modula-2 'm2pim+gm2': (
pim_stdlib_module_identifiers,
),
# Standard Library Modules for p1 Modula-2 'm2iso+p1': (
iso_stdlib_module_identifiers,
),
# Standard Library Modules for XDS Modula-2 'm2iso+xds': (
iso_stdlib_module_identifiers,
),
}
# Standard Library Types Database
stdlib_types_db = { # Empty entry for unknown dialect 'unknown': ( # LEAVE THIS EMPTY
), # Standard Library Types for PIM Modula-2 'm2pim': (
pim_stdlib_type_identifiers,
),
# Standard Library Types for ISO Modula-2 'm2iso': (
iso_stdlib_type_identifiers,
),
# Standard Library Types for Modula-2 R10 'm2r10': (
m2r10_stdlib_type_identifiers,
),
# Standard Library Types for Objective Modula-2 'objm2': (
m2r10_stdlib_type_identifiers,
),
# Standard Library Types for Aglet Modula-2 'm2iso+aglet': (
iso_stdlib_type_identifiers,
),
# Standard Library Types for GNU Modula-2 'm2pim+gm2': (
pim_stdlib_type_identifiers,
),
# Standard Library Types for p1 Modula-2 'm2iso+p1': (
iso_stdlib_type_identifiers,
),
# Standard Library Types for XDS Modula-2 'm2iso+xds': (
iso_stdlib_type_identifiers,
),
}
# Standard Library Procedures Database
stdlib_procedures_db = { # Empty entry for unknown dialect 'unknown': ( # LEAVE THIS EMPTY
), # Standard Library Procedures for PIM Modula-2 'm2pim': (
pim_stdlib_proc_identifiers,
),
# Standard Library Procedures for ISO Modula-2 'm2iso': (
iso_stdlib_proc_identifiers,
),
# Standard Library Procedures for Modula-2 R10 'm2r10': (
m2r10_stdlib_proc_identifiers,
),
# Standard Library Procedures for Objective Modula-2 'objm2': (
m2r10_stdlib_proc_identifiers,
),
# Standard Library Procedures for Aglet Modula-2 'm2iso+aglet': (
iso_stdlib_proc_identifiers,
),
# Standard Library Procedures for GNU Modula-2 'm2pim+gm2': (
pim_stdlib_proc_identifiers,
),
# Standard Library Procedures for p1 Modula-2 'm2iso+p1': (
iso_stdlib_proc_identifiers,
),
# Standard Library Procedures for XDS Modula-2 'm2iso+xds': (
iso_stdlib_proc_identifiers,
),
}
# Standard Library Variables Database
stdlib_variables_db = { # Empty entry for unknown dialect 'unknown': ( # LEAVE THIS EMPTY
), # Standard Library Variables for PIM Modula-2 'm2pim': (
pim_stdlib_var_identifiers,
),
# Standard Library Variables for ISO Modula-2 'm2iso': (
iso_stdlib_var_identifiers,
),
# Standard Library Variables for Modula-2 R10 'm2r10': (
m2r10_stdlib_var_identifiers,
),
# Standard Library Variables for Objective Modula-2 'objm2': (
m2r10_stdlib_var_identifiers,
),
# Standard Library Variables for Aglet Modula-2 'm2iso+aglet': (
iso_stdlib_var_identifiers,
),
# Standard Library Variables for GNU Modula-2 'm2pim+gm2': (
pim_stdlib_var_identifiers,
),
# Standard Library Variables for p1 Modula-2 'm2iso+p1': (
iso_stdlib_var_identifiers,
),
# Standard Library Variables for XDS Modula-2 'm2iso+xds': (
iso_stdlib_var_identifiers,
),
}
# Standard Library Constants Database
stdlib_constants_db = { # Empty entry for unknown dialect 'unknown': ( # LEAVE THIS EMPTY
), # Standard Library Constants for PIM Modula-2 'm2pim': (
pim_stdlib_const_identifiers,
),
# Standard Library Constants for ISO Modula-2 'm2iso': (
iso_stdlib_const_identifiers,
),
# Standard Library Constants for Modula-2 R10 'm2r10': (
m2r10_stdlib_const_identifiers,
),
# Standard Library Constants for Objective Modula-2 'objm2': (
m2r10_stdlib_const_identifiers,
),
# Standard Library Constants for Aglet Modula-2 'm2iso+aglet': (
iso_stdlib_const_identifiers,
),
# Standard Library Constants for GNU Modula-2 'm2pim+gm2': (
pim_stdlib_const_identifiers,
),
# Standard Library Constants for p1 Modula-2 'm2iso+p1': (
iso_stdlib_const_identifiers,
),
# Standard Library Constants for XDS Modula-2 'm2iso+xds': (
iso_stdlib_const_identifiers,
),
}
# Set lexer to a specified dialect def set_dialect(self, dialect_id): # # if __debug__: # print 'entered set_dialect with arg: ', dialect_id # # check dialect name against known dialects if dialect_id notin self.dialects:
dialect = 'unknown'# default else:
dialect = dialect_id # # compose lexemes to reject set
lexemes_to_reject_set = set() # add each list of reject lexemes for this dialect for list in self.lexemes_to_reject_db[dialect]:
lexemes_to_reject_set.update(set(list)) # # compose reserved words set
reswords_set = set() # add each list of reserved words for this dialect for list in self.reserved_words_db[dialect]:
reswords_set.update(set(list)) # # compose builtins set
builtins_set = set() # add each list of builtins for this dialect excluding reserved words for list in self.builtins_db[dialect]:
builtins_set.update(set(list).difference(reswords_set)) # # compose pseudo-builtins set
pseudo_builtins_set = set() # add each list of builtins for this dialect excluding reserved words for list in self.pseudo_builtins_db[dialect]:
pseudo_builtins_set.update(set(list).difference(reswords_set)) # # compose ADTs set
adts_set = set() # add each list of ADTs for this dialect excluding reserved words for list in self.stdlib_adts_db[dialect]:
adts_set.update(set(list).difference(reswords_set)) # # compose modules set
modules_set = set() # add each list of builtins for this dialect excluding builtins for list in self.stdlib_modules_db[dialect]:
modules_set.update(set(list).difference(builtins_set)) # # compose types set
types_set = set() # add each list of types for this dialect excluding builtins for list in self.stdlib_types_db[dialect]:
types_set.update(set(list).difference(builtins_set)) # # compose procedures set
procedures_set = set() # add each list of procedures for this dialect excluding builtins for list in self.stdlib_procedures_db[dialect]:
procedures_set.update(set(list).difference(builtins_set)) # # compose variables set
variables_set = set() # add each list of variables for this dialect excluding builtins for list in self.stdlib_variables_db[dialect]:
variables_set.update(set(list).difference(builtins_set)) # # compose constants set
constants_set = set() # add each list of constants for this dialect excluding builtins for list in self.stdlib_constants_db[dialect]:
constants_set.update(set(list).difference(builtins_set)) # # update lexer state
self.dialect = dialect
self.lexemes_to_reject = lexemes_to_reject_set
self.reserved_words = reswords_set
self.builtins = builtins_set
self.pseudo_builtins = pseudo_builtins_set
self.adts = adts_set
self.modules = modules_set
self.types = types_set
self.procedures = procedures_set
self.variables = variables_set
self.constants = constants_set # # if __debug__: # print 'exiting set_dialect' # print ' self.dialect: ', self.dialect # print ' self.lexemes_to_reject: ', self.lexemes_to_reject # print ' self.reserved_words: ', self.reserved_words # print ' self.builtins: ', self.builtins # print ' self.pseudo_builtins: ', self.pseudo_builtins # print ' self.adts: ', self.adts # print ' self.modules: ', self.modules # print ' self.types: ', self.types # print ' self.procedures: ', self.procedures # print ' self.variables: ', self.variables # print ' self.types: ', self.types # print ' self.constants: ', self.constants
# Extracts a dialect name from a dialect tag comment string and checks # the extracted name against known dialects. If a match is found, the # matching name is returned, otherwise dialect id 'unknown' is returned def get_dialect_from_dialect_tag(self, dialect_tag): # # if __debug__: # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag # # constants
left_tag_delim = '(*!'
right_tag_delim = '*)'
left_tag_delim_len = len(left_tag_delim)
right_tag_delim_len = len(right_tag_delim)
indicator_start = left_tag_delim_len
indicator_end = -(right_tag_delim_len) # # check comment string for dialect indicator if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \ and dialect_tag.startswith(left_tag_delim) \ and dialect_tag.endswith(right_tag_delim): # # if __debug__: # print 'dialect tag found' # # extract dialect indicator
indicator = dialect_tag[indicator_start:indicator_end] # # if __debug__: # print 'extracted: ', indicator # # check against known dialects for index in range(1, len(self.dialects)): # # if __debug__: # print 'dialects[', index, ']: ', self.dialects[index] # if indicator == self.dialects[index]: # # if __debug__: # print 'matching dialect found' # # indicator matches known dialect return indicator else: # indicator does not match any dialect return'unknown'# default else: # invalid indicator string return'unknown'# default
# intercept the token stream, modify token attributes and return them def get_tokens_unprocessed(self, text): for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): # # check for dialect tag if dialect has not been set by tag ifnot self.dialect_set_by_tag and token == Comment.Special:
indicated_dialect = self.get_dialect_from_dialect_tag(value) if indicated_dialect != 'unknown': # token is a dialect indicator # reset reserved words and builtins
self.set_dialect(indicated_dialect)
self.dialect_set_by_tag = True # # check for reserved words, predefined and stdlib identifiers if token is Name: if value in self.reserved_words:
token = Keyword.Reserved if self.algol_publication_mode:
value = value.lower() # elif value in self.builtins:
token = Name.Builtin if self.algol_publication_mode:
value = value.lower() # elif value in self.pseudo_builtins:
token = Name.Builtin.Pseudo if self.algol_publication_mode:
value = value.lower() # elif value in self.adts: ifnot self.treat_stdlib_adts_as_builtins:
token = Name.Namespace else:
token = Name.Builtin.Pseudo if self.algol_publication_mode:
value = value.lower() # elif value in self.modules:
token = Name.Namespace # elif value in self.types:
token = Name.Class # elif value in self.procedures:
token = Name.Function # elif value in self.variables:
token = Name.Variable # elif value in self.constants:
token = Name.Constant # elif token in Number: # # mark prefix number literals as error for PIM and ISO dialects if self.dialect notin ('unknown', 'm2r10', 'objm2'): if"'"in value or value[0:2] in ('0b', '0x', '0u'):
token = Error # elif self.dialect in ('m2r10', 'objm2'): # mark base-8 number literals as errors for M2 R10 and ObjM2 if token is Number.Oct:
token = Error # mark suffix base-16 literals as errors for M2 R10 and ObjM2 elif token is Number.Hex and'H'in value:
token = Error # mark real numbers with E as errors for M2 R10 and ObjM2 elif token is Number.Float and'E'in value:
token = Error # elif token in Comment: # # mark single line comment as error for PIM and ISO dialects if token is Comment.Single: if self.dialect notin ('unknown', 'm2r10', 'objm2'):
token = Error # if token is Comment.Preproc: # mark ISO pragma as error for PIM dialects if value.startswith('<*') and \
self.dialect.startswith('m2pim'):
token = Error # mark PIM pragma as comment for other dialects elif value.startswith('(*$') and \
self.dialect != 'unknown'and \ not self.dialect.startswith('m2pim'):
token = Comment.Multiline # else: # token is neither Name nor Comment # # mark lexemes matching the dialect's error token set as errors if value in self.lexemes_to_reject:
token = Error # # substitute lexemes when in Algol mode if self.algol_publication_mode: if value == '#':
value = '≠' elif value == '<=':
value = '≤' elif value == '>=':
value = '≥' elif value == '==':
value = '≡' elif value == '*.':
value = '•'
# return result yield index, token, value
def analyse_text(text): """It's Pascal-like, but does not use FUNCTION -- uses PROCEDURE
instead."""
# Check if this looks like Pascal, if not, bail out early ifnot ('(*'in text and'*)'in text and':='in text): return
result = 0 # Procedure is in Modula2 if re.search(r'\bPROCEDURE\b', text):
result += 0.6
# FUNCTION is only valid in Pascal, but not in Modula2 if re.search(r'\bFUNCTION\b', text):
result = 0.0
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.