/**************************************************************************** ** ** This file is part of GAP, a system for computational discrete algebra. ** ** Copyright of GAP belongs to its developers, whose names are too numerous ** to list here. Please refer to the COPYRIGHT file for details. ** ** SPDX-License-Identifier: GPL-2.0-or-later ** ** This file declares the functions of the scanner, which provides a very ** abstractions, namely the concept that an input file is a stream of ** symbols, such nasty things as <space>, <tab>, <newline> characters or ** comments (they are worst :-), characters making up identifiers or digits ** that make up integers are hidden from the rest of GAP.
*/
#ifndef GAP_SCANNER_H #define GAP_SCANNER_H
#include"common.h"
enum {
MAX_VALUE_LEN = 1024,
};
/**************************************************************************** ** *V Symbol . . . . . . . . . . . . . . . . . current symbol read from input ** ** The variable 'Symbol' contains the current symbol read from the input. ** It is represented as an uintptr_t. ** ** The possible values for 'Symbol' are defined in the definition file of ** this package as follows:
*/ enum SCANNER_SYMBOLS {
S_ILLEGAL = 0,
/**************************************************************************** ** *T TypSymbolSet . . . . . . . . . . . . . . . . . . type of sets of symbols ** ** 'TypSymbolSet' is the type of sets of symbols. Sets of symbols are used ** in the error recovery of the parser to specify that 'Match' should skip ** all symbols until finding one in a specified set. ** ** If there were less than 32 different symbols things would be very easy. ** We could simply assign the symbolic constants that are the possible ** values for 'Symbol' values 1, 2, 4, 8, 16, ... and so on. Then making a ** set would simply mean or-ing the values, as in 'S_INT|S_STRING', and ** checking whether a symbol is in a set would be '(<symbol> & <set>) != 0'. ** ** There are however more than 32 different symbols, so we must be more ** clever. We group some symbols that are syntactically equivalent like ** '*', '/' in a class. We use the least significant 3 bits to differentiate ** between members in one class. And now every symbol class, many of which ** contain just one symbol, has exactly one of the remaining most ** significant 29 bits set. Thus sets of symbols are represented as ** uintptr_t, which is typedef-ed to 'TypSymbolSet'. ** ** The classes are as follows, all other symbols are in a class themself: ** identifiers, IsBound, Unbind, Info, Assert ** if, for, repeat, while, return ** elif, else ** not, and, or ** =, <>, <, >=, <=, >, in ** +, - ** *, /, mod, ^
*/ typedef UInt TypSymbolSet;
/**************************************************************************** ** *F IS_IN( <symbol>, <set> ) . . . . . . . . is a symbol in a set of symbols ** ** 'IS_IN' returns non-zero if the symbol <symbol> is in the symbol set ** <set> and 0 otherwise. Due to the grouping into classes some symbol sets ** may contain more than mentioned. ** For example 'IS_IN(S_POW,S_MULT|S_DIV|S_MOD)' is 1.
*/ #define IS_IN(symbol,set) ((symbol) & ((set) & ~7))
/**************************************************************************** ** *V EXPRBEGIN . . . . . . . . . . . . set of symbols that start an expression *V STATBEGIN . . . . . . . . . . . . . set of symbols that start a statement ** ** 'EXPRBEGIN' is the set of symbols that might start an expression. ** ** 'STATBEGIN' is the set of symbols that might start a statement.
*/ #define EXPRBEGIN (S_IDENT|S_ISBOUND|S_INT|S_TRUE|S_FALSE|S_TILDE \
|S_CHAR|S_STRING|S_LBRACK|S_REC|S_FUNCTION \
|S_PLUS|S_MINUS|S_NOT|S_LPAREN)
/**************************************************************************** ** *T ScannerState ** ** The struct 'ScannerState' encapsulates the state of the scanner.
*/ typedefstruct {
//
TypInputFile * input;
// If 'Symbol' is 'S_IDENT', 'S_INT' or 'S_FLOAT' then normally the // variable 'Value' holds the name of the identifier or the digits of the // integer or float literal as a C string. For large integer or float // literals that do not fit into 'Value', instead 'ValueObj' holds the // literal as a GAP string object. // // Note that the size of identifiers in GAP is limited to 1023 characters, // hence identifiers are always stored in 'Value'. For this reason, // 'GetIdent' truncates an identifier after that many characters. char Value[MAX_VALUE_LEN];
// For large integer or float literals that do not fit into 'Value', // instead 'ValueObj' holds the literal as a GAP string object. If the // symbol is 'S_STRING' or 'S_HELP', the string literal or help text is // always stored in 'ValueObj' as a GAP string object.
Obj ValueObj;
// enum SCANNER_SYMBOLS Symbol;
// Track the last three symbols, for 'Unbound global' warnings
UInt SymbolStartPos[3];
UInt SymbolStartLine[3];
// 'NrError' is an integer whose value is the number of errors already // found in the current expression. It is set to 0 at the beginning of // 'Read' and incremented with each 'SyntaxError' call, including those // from 'Match'. // // If 'NrError' is greater than zero the parser functions will not create // new bags. This prevents the parser from creating new bags after an // error occurred.
UInt NrError;
} ScannerState;
/**************************************************************************** ** *F SyntaxError( <msg> ) . . . . . . . . . . . . . . . . raise a syntax error *F SyntaxWarning( <msg> ) . . . . . . . . . . . . . . raise a syntax warning ** ** 'SyntaxError' prints the current line, followed by the error message: ** ** ^ syntax error, <msg> in <current file name> ** ** with the '^' pointing to the current symbol on the current line. If the ** <current file name> is '*stdin*' it is not printed. ** ** 'SyntaxError' is called from the parser to print error messages for those ** errors that are not caught by 'Match', for example if the left hand side ** of an assignment is not a variable, a list element or a record component, ** or if two formal arguments of a function have the same identifier. ** ** 'SyntaxError' first increments 'NrError' by 1. If 'NrError' is greater ** than zero the parser functions will not create new bags. This prevents ** the parser from creating new bags after an error occurred. ** ** 'SyntaxError' also records the current line number in 'lastErrorLine' to ** prevent the printing of multiple error messages for one line, since they ** usually just reflect the fact that the parser has not resynchronized ** yet. ** ** 'SyntaxWarning' displays in the same way but does not change 'NrError' ** or 'lastErrorLine'. ** ** Note that unlike 'ErrorQuit', neither function raises an actual error, ** so execution continues as normal. Thus you must make sure that subsequent ** code can safely recover from the indicated error. ** ** Both functions should only be called from the scanner or reader, but not ** from e.g. the interpreter or coder, let alone any other parts of GAP. ** ** The 'WithOffset' variants allow marking a previously parsed token as ** the syntax error. This is used by 'Unbound global variable', as GAP ** does not know if a variable is unbound until another 2 tokens are read. **
*/ void SyntaxErrorWithOffset(ScannerState * s, constChar * msg, Int tokenoffset);
void SyntaxWarningWithOffset(ScannerState * s, constChar * msg, Int tokenoffset);
/**************************************************************************** ** *F Match( <symbol>, <msg>, <skipto> ) . match current symbol and fetch next ** ** 'Match' is the main interface between the scanner and the parser. It ** performs the four most common actions in the scanner with just one call. ** First it checks that the current symbol stored in the variable 'Symbol' ** is the expected symbol as passed in the argument <symbol>. If it is, ** 'Match' reads the next symbol from input and returns. Otherwise 'Match' ** first prints the current input line followed by the syntax error message: ** '^ syntax error, <msg> expected' with '^' pointing to the current symbol. ** It then skips symbols up to one in the resynchronisation set <skipto>. ** Actually 'Match' calls 'SyntaxError' so its comments apply here too. ** ** One kind of typical 'Match' call has the form ** ** 'Match( Symbol, "", 0 );'. ** ** This is used if the parser knows that the current symbol is correct, for ** example in 'ReadReturn' the first symbol must be 'S_RETURN', otherwise ** 'ReadReturn' would not have been called. Called this way 'Match' will of ** course never raise a syntax error, therefore <msg> and <skipto> are of no ** concern. The effect of this call is merely to read the next symbol from ** input. ** ** Another typical 'Match' call is in 'ReadIf' after we read the if symbol ** and the condition following, and now expect to see the 'then' symbol: ** ** Match( S_THEN, "then", STATBEGIN|S_ELIF|S_ELSE|S_FI|follow ); ** ** If the current symbol is 'S_THEN' it is matched and the next symbol is ** read. Otherwise 'Match' prints the current line followed by the error ** message: '^ syntax error, then expected'. Then 'Match' skips all symbols ** until finding either a symbol that can begin a statement, an 'elif' or ** 'else' or 'fi' symbol, or a symbol that is contained in the set <follow> ** which is passed to 'ReadIf' and contains all symbols allowing one of the ** calling functions to resynchronize, for example 'S_OD' if 'ReadIf' has ** been called from 'ReadFor'. <follow> always contain 'S_EOF', which 'Read' ** uses to resynchronise. ** ** If 'Match' needs to read a new line from '*stdin*' or '*errin*' to get ** the next symbol it prints the string pointed to by 'Prompt'.
*/ void Match(ScannerState * s,
UInt symbol, constChar * msg,
TypSymbolSet skipto);
/**************************************************************************** ** *F ScanForFloatAfterDotHACK() ** ** This function is called by 'ReadLiteral' if it encounters a single dot in ** form the of the symbol 'S_DOT'. The only legal way this could happen is ** if the dot is the start of a float literal like '.123'. As the scanner ** cannot detect this without being context aware, we must provide this ** function to allow the reader to signal to the scanner about this.
*/ void ScanForFloatAfterDotHACK(ScannerState * s);
#endif// GAP_SCANNER_H
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.