def is_malformed(s: Symbol): Boolean =
s.length match { case 1 => val c = s(0)
Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' case 2 => val c1 = s(0) val c2 = s(1)
!(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
}
def is_newline(s: Symbol): Boolean =
s == "\n" || s == "\r" || s == "\r\n"
class Matcher(text: CharSequence) { privatedef ok(i: Int): Boolean = 0 <= i && i < text.length privatedef char(i: Int): Char = if (ok(i)) text.charAt(i) else 0 privatedef maybe_char(c: Char, i: Int): Int = if (char(i) == c) i + 1 else i
@tailrec privatedef many_ascii_letdig(i: Int): Int = if (is_ascii_letdig(char(i))) many_ascii_letdig(i + 1) else i privatedef maybe_ascii_id(i: Int): Int = if (is_ascii_letter(char(i))) many_ascii_letdig(i + 1) else i
def match_length(i: Int): Int = { val a = char(i) val b = char(i + 1)
if (Character.isHighSurrogate(a) && Character.isLowSurrogate(b) || a == '\r' && b == '\n') 2 elseif (a == '\\' && b == '<') maybe_char('>', maybe_ascii_id(maybe_char('^', i + 2))) - i elseif (ok(i)) 1 else 0
}
def match_symbol(i: Int): String =
match_length(i) match { case 0 => "" case 1 => char_symbol(text.charAt(i)) case n => text.subSequence(i, i + n).toString
}
}
/* iterator */
def iterator(text: CharSequence): Iterator[Symbol] = new Iterator[Symbol] { privateval matcher = new Matcher(text) privatevar i = 0 def hasNext: Boolean = i < text.length def next(): Symbol = { val s = matcher.match_symbol(i)
i += s.length
s
}
}
object Index { privatesealedcaseclass Entry(chr: Int, sym: Int)
val empty: Index = new Index(Nil)
def apply(text: CharSequence): Index = { val matcher = new Matcher(text) val buf = new mutable.ListBuffer[Entry] var chr = 0 var sym = 0 while (chr < text.length) { val n = matcher.match_length(chr)
chr += n
sym += 1 if (n > 1) buf += Entry(chr, sym)
} if (buf.isEmpty) empty elsenew Index(buf.toList)
}
}
finalclass Index private(entries: List[Index.Entry]) { privateval hash: Int = entries.hashCode privateval index: Array[Index.Entry] = entries.toArray
def decode(symbol_offset: Offset): Text.Offset = { val sym = symbol_offset - 1 val end = index.length
@tailrec def bisect(a: Int, b: Int): Int = { if (a < b) { val c = (a + b) / 2 if (sym < index(c).sym) bisect(a, c) elseif (c + 1 == end || sym < index(c + 1).sym) c else bisect(c + 1, b)
} else -1
} val i = bisect(0, end) if (i < 0) sym else index(i).chr + sym - index(i).sym
} def decode(symbol_range: Range): Text.Range = symbol_range.map(decode)
overridedef hashCode: Int = hash overridedef equals(that: Any): Boolean =
that match { case other: Index => index.sameElements(other.index) case _ => false
}
}
/* symbolic text chunks -- without actual text */
object Text_Chunk { sealedabstractclass Name caseobject Default extends Name caseclass Id(id: Document_ID.Generic) extends Name caseclass File(name: String) extends Name
def apply(text: CharSequence): Text_Chunk = new Text_Chunk(Text.Range.length(text), Index(text))
}
finalclass Text_Chunk private(val range: Text.Range, privateval index: Index) { overridedef hashCode: Int = (range, index).hashCode overridedef equals(that: Any): Boolean =
that match { case other: Text_Chunk =>
range == other.range &&
index == other.index case _ => false
}
privateclass Recoder(list: List[(String, String)]) { privateval (min, max) = { var min = '\uffff' var max = '\u0000' for ((x, _) <- list) { val c = x(0) if (c < min) min = c if (c > max) max = c
}
(min, max)
} privateval table = { var tab = Map[String, String]() for ((x, y) <- list) {
tab.get(x) match { case None => tab += (x -> y) case Some(z) =>
error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
}
}
tab
} def recode(text: String): String = { val n = text.length val relevant = { var i = 0 var found = false while (i < n && !found) { val c = text(i) if (min <= c && c <= max) { found = true }
i += 1
}
found
} if (relevant) { val matcher = new Symbol.Matcher(text)
Library.string_builder(hint = n) { result => var i = 0 while (i < n) { val c = text(i) if (min <= c && c <= max) { val s = matcher.match_symbol(i)
result.append(table.getOrElse(s, s))
i += s.length
} else { result.append(c); i += 1 }
}
}
} else text
}
}
enum Argument { case none, cartouche, space_cartouche }
object Entry { privateval Name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""") privateval Argument = new Properties.String("argument") privateval Abbrev = new Properties.String("abbrev") privateval Code = new Properties.String("code") privateval Font = new Properties.String("font") privateval Group = new Properties.String("group")
val name =
symbol match { case Name(a) => a case _ => err("Cannot determine name") }
val argument =
props match { case Argument(arg) =>
Symbol.Argument.unapply(arg) getOrElse
error("Bad argument: " + quote(arg) + " for symbol " + quote(symbol)) case _ => Symbol.Argument.none
}
val code =
props match { case Code(s) => try { val code = Integer.decode(s).intValue if (code >= 128) Some(code) else err("Illegal ASCII code")
} catch { case _: NumberFormatException => err("Bad code") } case _ => None
}
val groups =
proper_list(for (case (Group.name, a) <- props) yield a).getOrElse(List("unsorted"))
val abbrevs = for (case (Abbrev.name, a) <- props) yield a
new Entry(symbol, name, argument, code, Font.unapply(props), groups, abbrevs)
}
}
class Entry private( val symbol: Symbol, val name: String, val argument: Symbol.Argument, val code: Option[Int], val font: Option[String], val groups: List[String], val abbrevs: List[String]
) { overridedef toString: String = symbol
val decode: Option[String] =
code.map(c => new String(Character.toChars(c)))
}
privatedef recode_map[A](elems: Iterable[(String, A)]): Map[String, A] =
(elems.iterator ++ elems.iterator.map({ case (sym, a) => (decode(sym), a) })).toMap
/* user fonts */
val fonts: Map[Symbol, String] =
recode_map(for (entry <- entries; font <- entry.font) yield entry.symbol -> font)
val font_names: List[String] = fonts.iterator.map(_._2).toSet.toList val font_index: Map[String, Int] = (font_names zip font_names.indices.toList).toMap
val symbolic: Set[String] =
recode_set(for (entry <- entries if raw_symbolic(entry.symbol)) yield entry.symbol)
/* misc symbols */
val newline_decoded: Symbol = decode(newline) val comment_decoded: Symbol = decode(comment) val cancel_decoded: Symbol = decode(cancel) val latex_decoded: Symbol = decode(latex) val marker_decoded: Symbol = decode(marker) val open_decoded: Symbol = decode(open) val close_decoded: Symbol = decode(close)
/* brackets */
val open_brackets_decoded = decode(open_brackets) val close_brackets_decoded = decode(close_brackets)
/* control symbols */
val control_decoded: Set[Symbol] =
(for (entry <- entries.iterator if entry.symbol.startsWith("\\<^"); s <- entry.decode) yield s).toSet
val sub_decoded: Symbol = decode(sub) val sup_decoded: Symbol = decode(sup) val bold_decoded: Symbol = decode(bold) val emph_decoded: Symbol = decode(emph) val bsub_decoded: Symbol = decode(bsub) val esub_decoded: Symbol = decode(esub) val bsup_decoded: Symbol = decode(bsup) val esup_decoded: Symbol = decode(esup)
}
def decode_strict(text: String): String = { val decoded = decode(text) if (encode(decoded) == text) decoded else { val bad = new mutable.ListBuffer[Symbol] for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) bad += s
error("Bad Unicode symbols in text: " + commas_quote(bad))
}
}
val newline: Symbol = "\\" def newline_decoded: Symbol = symbols.newline_decoded
def print_newlines(str: String): String = if (str.contains('\n'))
(for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString else str
/* formal comments */
val comment: Symbol = "\\" val cancel: Symbol = "\\<^cancel>" val latex: Symbol = "\\<^latex>" val marker: Symbol = "\\<^marker>"
def comment_decoded: Symbol = symbols.comment_decoded def cancel_decoded: Symbol = symbols.cancel_decoded def latex_decoded: Symbol = symbols.latex_decoded def marker_decoded: Symbol = symbols.marker_decoded
/* cartouches */
val open: Symbol = "\\" val close: Symbol = "\\"
def open_decoded: Symbol = symbols.open_decoded def close_decoded: Symbol = symbols.close_decoded
val sub: Symbol = "\\<^sub>" val sup: Symbol = "\\<^sup>" val bold: Symbol = "\\<^bold>" val emph: Symbol = "\\<^emph>" val bsub: Symbol = "\\<^bsub>" val esub: Symbol = "\\<^esub>" val bsup: Symbol = "\\<^bsup>" val esup: Symbol = "\\<^esup>"
def sub_decoded: Symbol = symbols.sub_decoded def sup_decoded: Symbol = symbols.sup_decoded def bold_decoded: Symbol = symbols.bold_decoded def emph_decoded: Symbol = symbols.emph_decoded def bsub_decoded: Symbol = symbols.bsub_decoded def esub_decoded: Symbol = symbols.esub_decoded def bsup_decoded: Symbol = symbols.bsup_decoded def esup_decoded: Symbol = symbols.esup_decoded
/* metric */
def is_printable(sym: Symbol): Boolean = if (is_ascii(sym)) is_ascii_printable(sym(0)) else !is_control(sym)
object Metric extends Pretty.Metric { val unit = 1.0 def apply(str: String): Double =
(for (s <- iterator(str)) yield { val sym = encode(s) if (sym.startsWith("\\)) 4 elseif (sym.startsWith("\\)) 3 elseif (sym.startsWith("\\) || sym.startsWith("\\)) 2 elseif (is_blank(sym) || is_printable(sym)) 1 else 0
}).sum
}
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.11 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.