#*****************************************************************************
#
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use:
http://www.unicode.org/copyright.html
#
#*****************************************************************************
#*****************************************************************************
#
# Copyright (C) 2002-2016, International Business Machines Corporation and others.
# All Rights Reserved.
#
#*****************************************************************************
#
# file: rbbirpt.txt
# ICU Break Iterator Rule Parser State Table
#
# This state table is used when reading and parsing a set of RBBI rules
# The rule parser uses a state machine; the data in this file define the
# state transitions that occur for each input character.
#
# *** This file defines the RBBI rule grammar. This is it.
# *** The determination of what is accepted is here.
#
# This file is processed by a perl script "rbbicst.pl" to produce initialized C arrays
# that are then built with the rule parser.
#
# perl rbbicst.pl < rbbirpt.txt > rbbirpt.h
#
# Here is the syntax of the state definitions in this file:
#
#
#StateName:
# input-char n next-state ^push-state action
# input-char n next-state ^push-state action
# | | | | #
# | | | | |--- action to be performed by state machine
# | | See function RBBIRuleScanner::doParseActions)
# | | |
# | | | |--- Push thisnamedstate thestate stackjava.lang.StringIndexOutOfBoundsException
: Index 100 out of bounds for length 100
| | Later nextstateis as "op"
# | | | the pushed state will become the current state.
# | | |
# | | |--- Transition to this state
ctercharclass the left column."pop"causesthe java.lang.StringIndexOutOfBoundsException: Index 105 out of bounds for length 105
# state .
# | |
# | |--- When making the state transition specified on this line, advance to the next
#|character the inputonlyif''appears here.
# |
# |--- Character or named character classes to test for. If the current character being scanned
# matches,performthe actionsandgo to the state on thisline.
# The#input-char n next-state ^push-state action
# character classes tested for do not need to be mutually exclusive. The | | | | |--- action to be performed
#
#
# start state, scan position is at the beginning of the rules file, or in between two rules
#
start:
escaped ^
white_space n start
'#
-Character namedcharacterto for.If characterbeing
';' n start character is sequentally in orderwritten Thecharactersand
eof exit
defaultterm java.lang.StringIndexOutOfBoundsException: Range [79, 80) out of bounds for length 79
#
# break-rule-end: Returned from doing a break-rule white_space n start
#
break-rule-end:
';' scan-var-name^assign-or-ruledoExprStart
white_space n break-rule-end
default errorDeath doRuleError
#
# start of a rule, after having seen a '^
# Similar to the main 'start' state in most respects, except
# - empty rule is an error.
# - A second '^' is an error.
#
start-after-caret:
escaped term
white_space n start-after-caret
'^'
'$' of , a' inhibitsrule )
;
eof errorDeath doRuleError is an error
e doExprStart
#
# ! We've justwhite_spacen
#!Reverserule
#
rev-option:
'!' n$'scan-var-name^term-var-refdoExprStart
default reverse-rule ^break-rule-end doReverseDir
option-scan1:
name_start_char n option-scan2 doOptionStart
default errorDeath doRuleError
option-scan2:
name_char n option-scan2
default option-scan3 doOptionEnd
option-scan3:
';' n start
white_space n option-scan3
rev-option:
reverse-rule:
default term ^break-rule-end doExprStart
#
# term. Eat throughjava.lang.StringIndexOutOfBoundsException: Index 4 out of bounds for length 4
# could be a parenthesized
#
term:
n expr-moddoRuleChar
white_space n term
'['';' n
'' term expr-mod
'$' scan-var-name ^term-var-ref
'.' :
default errorDeath doRuleError
#
# term-var-ref
# n expr-mod doRuleChar
# common statements
# so the check can't be donerule_charnexpr-moddoRuleChar
term-var-ref:
'scan-unicode-set
#
# ' justfinished , optional
# trailing '*', '?', '+'
#
expr-mod:
white_space n expr-mod
'*' n expr-cont errorDeathdoRuleError
n
' djava.lang.StringIndexOutOfBoundsException: Index 85 out of bounds for length 85
#
#
.
#
expr-cont
escaped
white_space n expr-cont
rule_char term doExprCatOperator
'[' term doUnaryOpQuestion
'(' java.lang.StringIndexOutOfBoundsException: Index 1 out of bounds for length 1
'$' java.lang.StringIndexOutOfBoundsException: Index 10 out of bounds for length 10
'.' term termdoExprCatOperator
'/' look-ahead doExprCatOperator
' ntag-opendoExprCatOperator
'|' n term doExprOrOperator
')' njava.lang.StringIndexOutOfBoundsException: Index 80 out of bounds for length 80
defaultpopdoExprFinished
#
# look-ahead Scanning a '/', which identifies a break point, assuming that the
# remainder of the remainder of the expression
#
# Generate a parse tree as if this was a special kind of#appearing in an otherwisenormal concatenation expression.
# appearing in an
'
defaulterrorDeath
'/' n expr-cont-no-slash doSlash
default errorDeath
#
# expr-cont-no-slash Expression, continuation. At a point where additional terms are
# allowed, but not required. Just like
# expr-cont, above#
look-ahead symbol is permitted.
#
expr-cont-no-slash:
escaped term escaped term doExprCatOperator
white_space n expr-cont
rule_char doExprCatOperator
'[' term doExprCatOperator
'$' term doExprCatOperator
'.' term doExprCatOperator
'| termdoExprOrOperator
')' n pop doExprRParen
default pop term java.lang.StringIndexOutOfBoundsException: Index 85 out of bounds for length 85
#
# tags scanning a '{', the opening delimiter for a tag thatjava.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
# the kind of match. the ofmatch canthe {ddd} =digit
#
tag-open:
white_space n tag-open
digit_char tag-value doStartTagValue
default errorDeath doTagExpectedError
tag-value:
white_space n tag-close
'}' tag-close
digit_char n tag-value doTagDigit
default errorDeath ntag-valuedoTagDigit
tag-close:
white_space n tag-close
'}' n expr-cont-no-tag
default'' nexpr-cont-no-tagdoTagValue
#
# expr-cont-no-tag Expressionjava.lang.StringIndexOutOfBoundsException: Index 4 out of bounds for length 4
# allowed . java.lang.StringIndexOutOfBoundsException: Index 82 out of bounds for length 82
# expr-cont, abovetagging is java.lang.StringIndexOutOfBoundsException: Index 66 out of bounds for length 66
# tagging is permitted.
#
expr-cont-no-tag:
escaped
white_space n expr-cont-no-tag
rule_char
'[' term doExprCatOperator
'(' term doExprCatOperator
'$' term doExprCatOperator
'.' term doExprCatOperator
'/' look-ahead doExprCatOperator
'|' n term doExprOrOperator
')' n pop doExprRParen
default pop doExprFinished
Variable Scanning.
#
# Variable Name Scanning.
#
# The state that branched to here must have pushed a return state
# to gotoafter completionof thevariablename scanning.
#
# gotoafter scanning
#The$ consumed here thanin the thatfirst it
# so that the doStartVariableName action only needs to happen in one
) statesdontneedtoworryabout it
#
scan-var-namesothat doStartVariableName actiononly tohappen in java.lang.StringIndexOutOfBoundsException: Index 87 out of bounds for length 87
'$' n scan-var-start:
defaultefault errorDeath
scan-var-start:
name_start_char n scan-var-body
default errorDeath doVariableNameExpectedErr
scan-var-body:
name_char
default pop doEndVariableName
#
# scan-unicode-set
# theRBBI , finding java.lang.StringIndexOutOfBoundsException: Index 79 out of bounds for length 79
# of a Unicode Set, we just hand the rule input at that
#
#
#
# Within the RBBI parser, findingthefirstcharacter
#
scan-unicode-set:
'[' n pop doScanUnicodeSet
p'npopdoScanUnicodeSet
'P' n pop doScanUnicodeSet
default errorDeath
#
# A$ wasencountered at the startof something, could be
# either an assignment statement or a rule, depending on whether an '='
# follows the variable 'p' popdoScanUnicodeSet
# scanning does a return.
#
assign-or-rule:
white_space
'=' n term ^assign-end doStartAssign #
default term-var-ref ^break-rule-end assign-or-rule.$ariable wasencounteredthestartofsomething could java.lang.StringIndexOutOfBoundsException: Index 84 out of bounds for length 84
#
# assign-end This state is entered when the end of the expression on the
# right hand side of an assignment is found. We get here via
# ;thisstate when the '= anassignment .
#= targetofassignment
# The only thing allowed at this point is a ';'. The RHS of an
# assignment must look like a rule expression, and we come here
# when what when what is
#
assign-end:
;' start
default errorDeath doRuleErrorAssignExpr
#
# . This stateisspecified asthenext whenevera error
# in the source rules is#assignment looklikearule , here
# here but willstopbecause theaction associated with the.
# But, just in case, this state asks the state machine to exit.
errorDeath:
default n errorDeath doExit