1 /* Title: Pure/General/symbol.scala
2 Author: Makarius
3
4 Detecting and recoding Isabelle symbols.
5 */
6
7 package isabelle
8
9 import scala.io.Source
10 import scala.collection.mutable
11 import scala.util.matching.Regex
12
13
14 object Symbol
15 {
16 type Symbol = String
17
18
19 /* spaces */
20
21 val spc = ' '
22 val space = " "
23
24 private val static_spaces = space * 4000
25
26 def spaces(k: Int): String =
27 {
28 require(k >= 0)
29 if (k < static_spaces.length) static_spaces.substring(0, k)
30 else space * k
31 }
32
33
34 /* ASCII characters */
35
36 def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
37 def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
38 def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
39
40 def is_ascii_letdig(c: Char): Boolean =
41 is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
42
43 def is_ascii_identifier(s: String): Boolean =
44 s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig)
45
46
47 /* Symbol regexps */
48
49 private val plain = new Regex("""(?xs)
50 [^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """)
51
52 private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
53
54 private val symbol = new Regex("""(?xs)
55 \\ < (?:
56 \^? [A-Za-z][A-Za-z0-9_']* |
57 \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
58
59 private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
60 """ [\ud800-\udbff\ufffd] | \\<\^? """)
61
62 val regex_total =
63 new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
64
65
66 /* basic matching */
67
68 def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
69
70 def is_physical_newline(s: Symbol): Boolean =
71 s == "\n" || s == "\r" || s == "\r\n"
72
73 def is_malformed(s: Symbol): Boolean =
74 !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
75
76 class Matcher(text: CharSequence)
77 {
78 private val matcher = regex_total.pattern.matcher(text)
79 def apply(start: Int, end: Int): Int =
80 {
81 require(0 <= start && start < end && end <= text.length)
82 if (is_plain(text.charAt(start))) 1
83 else {
84 matcher.region(start, end).lookingAt
85 matcher.group.length
86 }
87 }
88 }
89
90
91 /* iterator */
92
93 private val char_symbols: Array[Symbol] =
94 (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
95
96 def iterator(text: CharSequence): Iterator[Symbol] =
97 new Iterator[Symbol]
98 {
99 private val matcher = new Matcher(text)
100 private var i = 0
101 def hasNext = i < text.length
102 def next =
103 {
104 val n = matcher(i, text.length)
105 val s =
106 if (n == 0) ""
107 else if (n == 1) {
108 val c = text.charAt(i)
109 if (c < char_symbols.length) char_symbols(c)
110 else text.subSequence(i, i + n).toString
111 }
112 else text.subSequence(i, i + n).toString
113 i += n
114 s
115 }
116 }
117
118 def explode(text: CharSequence): List[Symbol] = iterator(text).toList
119
120
121 /* decoding offsets */
122
123 class Index(text: CharSequence)
124 {
125 sealed case class Entry(chr: Int, sym: Int)
126 val index: Array[Entry] =
127 {
128 val matcher = new Matcher(text)
129 val buf = new mutable.ArrayBuffer[Entry]
130 var chr = 0
131 var sym = 0
132 while (chr < text.length) {
133 val n = matcher(chr, text.length)
134 chr += n
135 sym += 1
136 if (n > 1) buf += Entry(chr, sym)
137 }
138 buf.toArray
139 }
140 def decode(sym1: Int): Int =
141 {
142 val sym = sym1 - 1
143 val end = index.length
144 def bisect(a: Int, b: Int): Int =
145 {
146 if (a < b) {
147 val c = (a + b) / 2
148 if (sym < index(c).sym) bisect(a, c)
149 else if (c + 1 == end || sym < index(c + 1).sym) c
150 else bisect(c + 1, b)
151 }
152 else -1
153 }
154 val i = bisect(0, end)
155 if (i < 0) sym
156 else index(i).chr + sym - index(i).sym
157 }
158 def decode(range: Text.Range): Text.Range = range.map(decode(_))
159 }
160
161
162 /* recoding text */
163
164 private class Recoder(list: List[(String, String)])
165 {
166 private val (min, max) =
167 {
168 var min = '\uffff'
169 var max = '\u0000'
170 for ((x, _) <- list) {
171 val c = x(0)
172 if (c < min) min = c
173 if (c > max) max = c
174 }
175 (min, max)
176 }
177 private val table =
178 {
179 var tab = Map[String, String]()
180 for ((x, y) <- list) {
181 tab.get(x) match {
182 case None => tab += (x -> y)
183 case Some(z) =>
184 error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
185 }
186 }
187 tab
188 }
189 def recode(text: String): String =
190 {
191 val len = text.length
192 val matcher = regex_total.pattern.matcher(text)
193 val result = new StringBuilder(len)
194 var i = 0
195 while (i < len) {
196 val c = text(i)
197 if (min <= c && c <= max) {
198 matcher.region(i, len).lookingAt
199 val x = matcher.group
200 result.append(table.get(x) getOrElse x)
201 i = matcher.end
202 }
203 else { result.append(c); i += 1 }
204 }
205 result.toString
206 }
207 }
208
209
210
211 /** symbol interpretation **/
212
213 private lazy val symbols =
214 new Interpretation(
215 Isabelle_System.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS"))))
216
217 private class Interpretation(symbols_spec: String)
218 {
219 /* read symbols */
220
221 private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
222 private val key = new Regex("""(?xs) (.+): """)
223
224 private def read_decl(decl: String): (Symbol, Map[String, String]) =
225 {
226 def err() = error("Bad symbol declaration: " + decl)
227
228 def read_props(props: List[String]): Map[String, String] =
229 {
230 props match {
231 case Nil => Map()
232 case _ :: Nil => err()
233 case key(x) :: y :: rest => read_props(rest) + (x -> y)
234 case _ => err()
235 }
236 }
237 decl.split("\\s+").toList match {
238 case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
239 case _ => err()
240 }
241 }
242
243 private val symbols: List[(Symbol, Map[String, String])] =
244 Map((
245 for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches)
246 yield read_decl(decl)): _*) toList
247
248
249 /* misc properties */
250
251 val names: Map[Symbol, String] =
252 {
253 val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
254 Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
255 }
256
257 val abbrevs: Map[Symbol, String] =
258 Map((
259 for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
260 yield (sym -> props("abbrev"))): _*)
261
262
263 /* recoding */
264
265 private val (decoder, encoder) =
266 {
267 val mapping =
268 for {
269 (sym, props) <- symbols
270 val code =
271 try { Integer.decode(props("code")).intValue }
272 catch {
273 case _: NoSuchElementException => error("Missing code for symbol " + sym)
274 case _: NumberFormatException => error("Bad code for symbol " + sym)
275 }
276 val ch = new String(Character.toChars(code))
277 } yield {
278 if (code < 128) error("Illegal ASCII code for symbol " + sym)
279 else (sym, ch)
280 }
281 (new Recoder(mapping),
282 new Recoder(mapping map { case (x, y) => (y, x) }))
283 }
284
285 def decode(text: String): String = decoder.recode(text)
286 def encode(text: String): String = encoder.recode(text)
287
288 private def recode_set(elems: String*): Set[String] =
289 {
290 val content = elems.toList
291 Set((content ::: content.map(decode)): _*)
292 }
293
294 private def recode_map[A](elems: (String, A)*): Map[String, A] =
295 {
296 val content = elems.toList
297 Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
298 }
299
300
301 /* user fonts */
302
303 val fonts: Map[Symbol, String] =
304 recode_map((
305 for ((sym, props) <- symbols if props.isDefinedAt("font"))
306 yield (sym -> props("font"))): _*)
307
308 val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
309 val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*)
310
311
312 /* classification */
313
314 val letters = recode_set(
315 "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
316 "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
317 "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
318 "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
319
320 "\\", "\\", "\\", "\\", "\\", "\\", "\\",
321 "\\", "\\", "\\", "\\", "\\", "\\", "\\",
322 "\\", "\\" , "\\" , "\\", "\\", "\\", "\\",
323 "\\", "\\", "\\", "\\", "\\", "\\", "\\",
324 "\\", "\\", "\\", "\\", "\\", "\\", "\\",
325 "\\", "\\", "\\", "\\", "\\", "\\", "\\" ,
326 "\\" , "\\", "\\", "\\", "\\", "\\", "\\",
327 "\\", "\\", "\\",
328
329 "\\", "\\", "\\", "\\", "\\", "\\",
330 "\\", "\\", "\\", "\\", "\\", "\\",
331 "\\", "\\", "\\", "\\", "\\", "\\",
332 "\\", "\\", "\\", "\\", "\\", "\\",
333 "\\", "\\", "\\", "\\", "\\", "\\",
334 "\\", "\\", "\\", "\\", "\\", "\\",
335 "\\", "\\", "\\", "\\", "\\", "\\",
336 "\\", "\\", "\\", "\\", "\\", "\\",
337 "\\", "\\", "\\", "\\",
338
339 "\\", "\\", "\\", "\\", "\\",
340 "\\", "\\", "\\", "\\", "\\",
341 "\\", "\\", "\\", "\\", "\\", "\\",
342 "\\", "\\", "\\", "\\", "\\",
343 "\\", "\\", "\\", "\\", "\\",
344 "\\", "\\", "\\", "\\", "\\",
345 "\\", "\\",
346
347 "\\<^isub>", "\\<^isup>")
348
349 val blanks =
350 recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\\", "\\<^newline>")
351
352 val sym_chars =
353 Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
354
355 val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
356
357
358 /* control symbols */
359
360 val ctrl_decoded: Set[Symbol] =
361 Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
362
363 val sub_decoded = decode("\\<^sub>")
364 val sup_decoded = decode("\\<^sup>")
365 val isub_decoded = decode("\\<^isub>")
366 val isup_decoded = decode("\\<^isup>")
367 val bsub_decoded = decode("\\<^bsub>")
368 val esub_decoded = decode("\\<^esub>")
369 val bsup_decoded = decode("\\<^bsup>")
370 val esup_decoded = decode("\\<^esup>")
371 val bold_decoded = decode("\\<^bold>")
372 }
373
374
375 /* tables */
376
377 def names: Map[Symbol, String] = symbols.names
378 def abbrevs: Map[Symbol, String] = symbols.abbrevs
379
380 def decode(text: String): String = symbols.decode(text)
381 def encode(text: String): String = symbols.encode(text)
382
383 def fonts: Map[Symbol, String] = symbols.fonts
384 def font_names: List[String] = symbols.font_names
385 def font_index: Map[String, Int] = symbols.font_index
386 def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_))
387
388
389 /* classification */
390
391 def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)
392 def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
393 def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"
394 def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
395 def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
396
397 def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
398 def is_symbolic(sym: Symbol): Boolean = raw_symbolic(sym) || symbols.symbolic.contains(sym)
399
400 private def raw_symbolic(sym: Symbol): Boolean =
401 sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
402
403
404
405
406 /* control symbols */
407
408 def is_ctrl(sym: Symbol): Boolean =
409 sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym)
410
411 def is_controllable(sym: Symbol): Boolean =
412 !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
413
414 def sub_decoded: Symbol = symbols.sub_decoded
415 def sup_decoded: Symbol = symbols.sup_decoded
416 def isub_decoded: Symbol = symbols.isub_decoded
417 def isup_decoded: Symbol = symbols.isup_decoded
418 def bsub_decoded: Symbol = symbols.bsub_decoded
419 def esub_decoded: Symbol = symbols.esub_decoded
420 def bsup_decoded: Symbol = symbols.bsup_decoded
421 def esup_decoded: Symbol = symbols.esup_decoded
422 def bold_decoded: Symbol = symbols.bold_decoded
423 }
¤ Dauer der Verarbeitung: 0.19 Sekunden
(vorverarbeitet)
¤
|
Haftungshinweis
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.
|