(* Title: Pure/Syntax/lexicon.ML Author: Tobias Nipkow and Markus Wenzel, TU Muenchen
Lexer for the inner Isabelle syntax (terms and types).
*)
signature LEXICON = sig structure Syntax: sig valconst: string -> term val free: string -> term val var: indexname -> term end val scan_id: Symbol_Pos.T list scanner val scan_longid: Symbol_Pos.T list scanner val scan_tid: Symbol_Pos.T list scanner val scan_hex: Symbol_Pos.T list scanner val scan_bin: Symbol_Pos.T list scanner val scan_var: Symbol_Pos.T list scanner val scan_tvar: Symbol_Pos.T list scanner val is_tid: string -> bool datatype token_kind =
Literal | Ident | Long_Ident | Var | Type_Ident | Type_Var | Num | Float | Str | String | Cartouche | Space | Comment of Comment.kind | Dummy | EOF
eqtype token val kind_of_token: token -> token_kind val str_of_token: token -> string val range_of_token: token -> Position.range val pos_of_token: token -> Position.T val end_pos_of_token: token -> Position.T val is_proper: token -> bool val dummy: token val is_dummy: token -> bool val literal: string -> token val is_literal: token -> bool val token_ord: token ord val token_content_ord: token ord val token_type_ord: token ord val mk_eof: Position.T -> token val eof: token val is_eof: token -> bool val stopper: token Scan.stopper val terminals: stringlist val is_terminal: string -> bool val get_terminal: string -> token option val literal_markup: string -> Markup.T list val reports_of_token: token -> Position.report list val reported_token_range: Proof.context -> token -> string val valued_token: token -> bool val implode_string: Symbol.symbol list -> string val explode_string: string * Position.T -> Symbol_Pos.T list val implode_str: Symbol.symbol list -> string val explode_str: string * Position.T -> Symbol_Pos.T list val tokenize: Scan.lexicon -> {raw: bool} -> Symbol_Pos.T list -> token list val read_indexname: string -> indexname val read_var: string -> term val read_variable: string -> indexname option val read_nat: string -> int option val read_int: string -> int option val read_num: string -> {radix: int, leading_zeros: int, value: int} val read_float: string -> {mant: int, exp: int} val mark_syntax: string -> string val mark_binder: string -> string val mark_indexed: string -> string val mark_class: string -> stringval unmark_class: string -> stringval is_class: string -> bool val mark_type: string -> stringval unmark_type: string -> stringval is_type: string -> bool val mark_const: string -> stringval unmark_const: string -> stringval is_const: string -> bool val mark_fixed: string -> stringval unmark_fixed: string -> stringval is_fixed: string -> bool val unmark_entity:
{case_class: string -> 'a,
case_type: string -> 'a,
case_const: string -> 'a,
case_fixed: string -> 'a,
case_default: string -> 'a} -> string -> 'a val is_marked_entity: string -> bool val dummy_type: term val fun_type: term end;
structure Lexicon: LEXICON = struct
(** syntactic terms **)
structure Syntax = struct
funconst c = Const (c, dummyT); fun free x = Free (x, dummyT); fun var xi = Var (xi, dummyT);
val token_kind = Vector.nth token_kinds; fun token_kind_index k = #1 (the (Vector.findi (fn (_, k') => k = k') token_kinds));
(* datatype token *)
datatype token = Token of int * string * Position.range;
fun index_of_token (Token (i, _, _)) = i; val kind_of_token = index_of_token #> token_kind; fun str_of_token (Token (_, s, _)) = s; fun range_of_token (Token (_, _, r)) = r; val pos_of_token = #1 o range_of_token; val end_pos_of_token = #1 o range_of_token;
val is_proper = kind_of_token #> (fn Space => false | Comment _ => false | _ => true);
val dummy_index = token_kind_index Dummy; val dummy = Token (dummy_index, "", Position.no_range); fun is_dummy tok = index_of_token tok = dummy_index;
(* literals *)
val literal_index = token_kind_index Literal; fun literal s = Token (literal_index, s, Position.no_range); fun is_literal tok = index_of_token tok = literal_index;
(* order *)
fun token_ord (Token (i, s, r), Token (i', s', r')) =
(case int_ord (i, i') of
EQUAL =>
(case fast_string_ord (s, s') of
EQUAL => Position.range_ord (r, r')
| ord => ord)
| ord => ord);
fun token_content_ord (Token (i, s, _), Token (i', s', _)) =
(case int_ord (i, i') of
EQUAL => fast_string_ord (s, s')
| ord => ord);
fun token_type_ord toks = letval is = apply2 index_of_token toks in
(case int_ord is of
EQUAL => if #1 is = literal_index then fast_string_ord (apply2 str_of_token toks) else EQUAL
| ord => ord) end;
(* stopper *)
val eof_index = token_kind_index EOF; fun mk_eof pos = Token (eof_index, "", (pos, Position.none)); val eof = mk_eof Position.none;
fun is_eof tok = index_of_token tok = eof_index; val stopper = Scan.stopper (K eof) is_eof;
val terminals = Symtab.keys terminal_symbols; val is_terminal = Symtab.defined terminal_symbols; fun get_terminal s =
(case Symtab.lookup terminal_symbols s of
SOME i => SOME (Token (i, s, Position.no_range))
| NONE => NONE);
(* markup *)
fun literal_markup s = letval syms = Symbol.explode s in if Symbol.has_control_block syms then [] elseif Symbol.is_ascii_identifier s orelse exists Symbol.is_letter syms then [Markup.literal] else [Markup.delimiter] end;
fun reports_of_token tok = let val pos = pos_of_token tok; val markups = if is_literal tok then literal_markup (str_of_token tok) else token_kind_markup (kind_of_token tok); inmap (pair pos) markups end;
fun reported_token_range ctxt tok = if is_proper tok then Context_Position.reported_text ctxt (pos_of_token tok) Markup.token_range "" else"";
(* valued_token *)
fun valued_token tok = not (is_literal tok orelse is_eof tok);
(** string literals **)
fun explode_literal scan_body (str, pos) =
(case Scan.read Symbol_Pos.stopper scan_body (Symbol_Pos.explode (str, pos)) of
SOME ss => ss
| _ => error (err_prefix ^ "malformed string literal " ^ quote str ^ Position.here pos));
(* string *)
val scan_string = Scan.trace (Symbol_Pos.scan_string_qq err_prefix) >> #2; val scan_string_body = Symbol_Pos.scan_string_qq err_prefix >> (#1 o #2);
fun implode_string ss = quote (implode (map (fn "\"" => "\\\"" | s => s) ss)); val explode_string = explode_literal scan_string_body;
(* str *)
val scan_chr =
$$ "\\" |-- $$$ "'" ||
Scan.one
((fn s => s <> "\\" andalso s <> "'" andalso Symbol.not_eof s) o
Symbol_Pos.symbol) >> single ||
$$$ "'" --| Scan.ahead (~$$ "'");
fun read_indexname s =
(case Scan.read Symbol_Pos.stopper scan_indexname (Symbol_Pos.explode0 s) of
SOME xi => xi
| _ => error ("Lexical error in variable name: " ^ quote s));
(* read_var *)
fun read_var str = let val scan =
$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof)
>> Syntax.var ||
Scan.many (Symbol.not_eof o Symbol_Pos.symbol)
>> (Syntax.free o implode o map Symbol_Pos.symbol); in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode0 str)) end;
(* read_variable *)
fun read_variable str = letval scan = $$ "?" |-- scan_indexname || scan_indexname in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode0 str) end;
(* read numbers *)
local
fun nat cs = Option.map (#1 o Library.read_int o map Symbol_Pos.symbol)
(Scan.read Symbol_Pos.stopper Symbol_Pos.scan_nat cs);
in
fun read_nat s = nat (Symbol_Pos.explode0 s);
fun read_int s =
(case Symbol_Pos.explode0 s of
("-", _) :: cs => Option.map ~ (nat cs)
| cs => nat cs);
end;
(* read_num: hex/bin/decimal *)
local
val ten = Char.ord #"0" + 10; val a = Char.ord #"a"; val A = Char.ord #"A"; val _ = a > A orelse raise Fail "Bad ASCII";
fun remap_hex c = letval x = ord c in if x >= a thenchr (x - a + ten) elseif x >= A thenchr (x - A + ten) else c end;
fun read_num str = let val (radix, digs) =
(case Symbol.explode str of "0" :: "x" :: cs => (16, map remap_hex cs)
| "0" :: "b" :: cs => (2, cs)
| cs => (10, cs)); in
{radix = radix,
leading_zeros = leading_zeros digs,
value = #1 (Library.read_radix_int radix digs)} end;
end;
fun read_float str = let val cs = Symbol.explode str; val (intpart, fracpart) =
(case chop_prefix Symbol.is_digit cs of
(intpart, "." :: fracpart) => (intpart, fracpart)
| _ => raise Fail "read_float"); in
{mant = #1 (Library.read_int (intpart @ fracpart)),
exp = length fracpart} end;
(** marked names **)
fun marker s = (prefix s, unprefix s, String.isPrefix s);
(* syntax consts *)
val (mark_syntax, _, _) = marker "\<^syntax>"; val (mark_binder, _, _) = marker "\<^binder>"; val (mark_indexed, _, _) = marker "\<^indexed>";
(* logical entities *)
val (mark_class, unmark_class, is_class) = marker "\<^class>"; val (mark_type, unmark_type, is_type) = marker "\<^type>"; val (mark_const, unmark_const, is_const) = marker "\<^const>"; val (mark_fixed, unmark_fixed, is_fixed) = marker "\<^fixed>";
fun unmark_entity {case_class, case_type, case_const, case_fixed, case_default} s =
(casetry unmark_class s of
SOME c => case_class c
| NONE =>
(casetry unmark_type s of
SOME c => case_type c
| NONE =>
(casetry unmark_const s of
SOME c => case_const c
| NONE =>
(casetry unmark_fixed s of
SOME c => case_fixed c
| NONE => case_default s))));
fun is_marked_entity s =
is_class s orelse is_type s orelse is_const s orelse is_fixed s;
val dummy_type = Syntax.const (mark_type "dummy"); val fun_type = Syntax.const (mark_type "fun");
(* toplevel pretty printing *)
val _ =
ML_system_pp (fn _ => fn _ =>
Pretty.to_ML o Pretty.str_list "{""}" o map quote o Scan.dest_lexicon);
end;
¤ Dauer der Verarbeitung: 0.18 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.