/* * art.c * * ACCENT RUNTIME * * Copyright (C) 1984, 1999 Friedrich Wilhelm Schroeer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#if DYNAMICITEMS int ITEMLIMIT; #else #define ITEMLIMIT 285000 #endif
#define ITEMINCR 285000
# if DYNAMICITEMS long *dot, *back, *left, *sub; #else long dot[ITEMLIMIT], back[ITEMLIMIT], left[ITEMLIMIT], sub[ITEMLIMIT]; #endif /* * An "item" is a quadrupel < D, B, L, S > , where * * D is a rule currently being processed. * A working point ("dot", written as "*") * devides the right hand side into part already being processed (alpha) * and a part that still needs to be processed (beta) * M : alpha * beta * * B (back-pointer) is a reference to a list of items in which the * processing of rule rule * D = M : M : alpha * beta * startet. This list contains a start item * I' = < D', B', L', S' > * where * D' = M : * alpha beta * * L (left-pointer) * * S (sub-pointer) * * [ documentation to be completed in forthcoming release ....... ] * * If I is the index of an item < D, B, L, S > then * dot[I] = D * back[I] = B * left[I] = L * sub[I] = S
*/
long thislist; /* * current itemlist (index of first item of list)
*/
long last_item; /* * index of last item in item list * position last_item+1 is used for sentinel during searching * last_item+1 must not exceed ITEMLIMIT-1 (i.e. last_item < ITEMLIMIT-2) *
*/
long specialitemadded; /* * an item with index i such that back[i]==thislist * has been added to the current item list * This requires a rescan of previous items during closure computation * (which otherwise is not neccessary)
*/
externint yygrammar[]; /* * encoded grammar * defined in 'yygrammar.c' * * * structure of encoding * * a rule * [r] M0 : M1 ... Mn * with number r, lhs M0, and rhs M1 .. M1 * is encoded as follows * * +-- chain<-+ * | M1*-------> first rule for M1 * | ... | * | Mn* | * | -M0*---+ * | r * +-> next rule for M0 * * chain : index of start of next rule for M0 (or 0) * * M1* : encoding of rhs member Mi (1 <= i <= n) * ... Mi nonterminal: index of start of encoding of first rule for Mi * Mn* Mi terminal: see below * * -M0* : negative index of start of encoding of first rule for M0 * * r : rule number * * The grammar starts with the rule * [0] YYSTART : UserRoot EOF * which is encoded as follows: * * 1: 0 (chain) * 2: 6 (first rule of user root) (= STARTUSERGRAMMAR) * 3: EOF (eof token) * 4: -1 (points to start of rule) * 5: 0 (rule number)
*/
#define STARTUSERGRAMMAR 8
externint yyannotation[]; /* defined in 'yygrammar.c' */
externint yycoordinate[]; /* defined in 'yygrammar.c' */
externint c_length; /* * length of yygrammar * defined in 'yygrammar.c'
*/
/* nonterminals are encoded as * 1 ... term_base-1 * the eof terminal is encoded as * termbase * single character terminals are encoded as * term_base+1 ... term_base+max_char * named terminals are encoded as * term_base+max_char+1 ... * * yylex() returns an original token code * that must be incremented by term_base
*/
/*============================================================================*/ /* DIRECTOR SETS */ /*============================================================================*/
long sym; /* current input token */
long lookaheadsym; /* next input token */
externlong yypos; /* this variable must be set by the scanner */
#if BIGHASH externlong DHITS[]; long BHITS[ITEMLIMIT]; /* * search optimisation * * DHITS[d] == thislist: * there is an item with dot d in the current itemlist * (if not: no search neccessary) * * BHITS[b] == thislist: * there is an item with backpointer b in the current itemlist * (if not: no search neccessary)
*/ #endif
#if STATISTICS int iterationcount = 0; int itemlistcount; int notviable = 0; int search = 0; int nosearch = 0; int nosearch_b = 0; int nosearch_h = 0; /* no_search == 1: * no need to call SEARCH because no item with same hash code
*/ int foundsame = 0; int foundnew = 0; int completerstep = 0; int truecompleterstep = 0; int calls_additem = 0; int itemsadded = 0; int rules_rej = 0; /* rules rejected by 'lookup_dirset' (predictor) */ int tokens_rej = 0; /* tokens rejected by 'is_viable' (kernel/completer) */ int nonterms_rej = 0; /* nonterms rejected by 'is_viable' (kernel/completer) */
int GOOD = 0; /* number of items with dot preceeding a token */ /* that contribute to the next kernel */ int BAD = 0; /* number of items with dot preceeding a token */ /* that don't contribute to the next kernel */ #endif
PRIVATE print_item(p) int p; /* * print the item with index p
*/
{ int i, b, e, l, k;
i = dot[p];
printf(" %d: ", p); if (dot[p] == 0 && sub[p] == 0) {
printf("[ separator-item ]\n"); return;
} if (i <= 5) {
b = 1;
} else {
b = i-1; while(yygrammar[b] >= 0) b--;
b += 2;
} /* b points to the start of the rule */
e = i; while(yygrammar[e] >= 0) e++; /* e points to the end of the rule, i.e. the lhscode */
l = - yygrammar[e]; /* l is the lhs */
printf("%s :", yyprintname(l));
k = b+1; /* k points to the first member */ while(yygrammar[k] > 0) { if (k == i) printf(" *"); /* print member yygrammar[k] */ if (yygrammar[k] == eofsym) {
printf(" ");
} elseif (yygrammar[k] > term_base) { if (yygrammar[k] < term_base+max_char+1) {
printf(" '%c'", yygrammar[k]-term_base);
} else
printf(" %s", yyprintname(yygrammar[k]));
} else {
printf(" %s", yyprintname(yygrammar[k]));
}
k++;
} if (yygrammar[i] <= 0) printf(" *");
printf(" (back:%d sub:%d left:%d)\n", back[p], sub[p], left[p]);
}
PRIVATE print_coordinate(i) int i; /* * print source coordinate (of grammar file) with code i * the number encodes both, line and column information
*/
{ int pos = yycoordinate[i]; int l = pos / 1000; int c = pos % 1000;
PRIVATE print_tree(i) int i; /* * print tree for item with index i
*/
{ staticint indent = 0; int k;
/* rule number if item at end of rule */ if (yygrammar[dot[i]] < 0 ) { /* end of rule */ for (k = 1; k <= indent; k++) printf(" ");
printf("%s alternative at ", yyprintname(-yygrammar[dot[i]]));
print_coordinate(dot[i]+1);
printf(" {\n");
indent++;
}
/* left brothers */
if (left[i]) print_tree(left[i]);
/* this son */
if (left[i]) { int sym = yygrammar[dot[i]-1];
if (sym > term_base) { for (k = 1; k <= indent; k++) printf(" "); if (sym < term_base+max_char+1)
printf("'%c'\n", yygrammar[dot[i]-1]-term_base); else
printf("%s\n", yyprintname(sym));
}
}
/* subtree for this son */
if (sub[i]) print_tree(sub[i]);
if (yygrammar[dot[i]] < 0 ) { /* end of rule */
indent--; for (k = 1; k <= indent; k++) printf(" ");
printf("}\n");
}
}
/*============================================================================*/ /* DIRECTOR SETS */ /*============================================================================*/
PRIVATEint lookup_dirset(ruleptr) long ruleptr; /* * Let 'rule' be the rule be the rule into which 'ruleptr' points. * Let 'tkn' be the code of the next token. * Return ('tkn' is in the director set of 'rule').
*/
{ int p; int rule; int tkn;
/* find rule number */
p = ruleptr; while (yygrammar[p] >= 0) p++;
p++;
rule = yygrammar[p];
PRIVATEint is_viable (d) int d; /* * d is a pointer into the encoded grammar * Returns true if the the symbol yygrammar[d] (token or nonterminal) * is a valid continuation of the parse: * can it start with the current lookahead token? * If the symbol is a token it is compared with the lookahead token * If symbol is a nonterminal it is checked whether the lookahead token * appears in the director sets of the rules for the nonterm of the symbol * * NOTE: the union of the director sets should be computed statically
*/
{ if (yygrammar[d] >= term_base) { /* token */ if (yygrammar[d] == lookaheadsym) { return 1;
} else { #if STATISTICS
tokens_rej++; #endif return 0;
}
}
elseif (yygrammar[d] > 0 ) { /* nonterm */
int start;
start = yygrammar[d]; /* start points to the first rule for the nonterm */ do { int p; int rule;
p = start+1; while (yygrammar[p] >= 0) {
p++;
} /* p now points to negative lhs encoding */
rule = yygrammar[p+1]; if (yydirset(rule, lookaheadsym-term_base)) { return 1;
}
start = yygrammar[start];
} while (start);
int hash[HSIZE]; /* * hash table to speed up lookup-function * if an item with back pointer b and dot d is entered * into the current item list * an entry with a corresponding hash code is set
*/
PRIVATE readsym () /* * read next token * current token: 'sym' * following token: 'lookaheadsym' * extend the list of lexical values by calling * next_lexval() provided by 'yyactions.c'
*/
{
/* subptr points to an item */ /* return the prio of corresponding rule */
{ int i;
/* find end of rule */
i = dot[subptr]; while (yygrammar[i] > 0) i++; /* i points to the negative lhs encoding */
i++; /* i now points to the rule number, this is also the index of the prio */
PRIVATEint test_for_cycle(subtree, container) int subtree; int container; /* * return true if * the tree to which 'container' (a "subpointer" of an item) points * contains the tree to which 'subtree' (also a "subpointer") points
*/
{ if (container < subtree) { /* an earlier item cannot refer a later one */ return 0;
} if (container == subtree) { return 1;
} if (sub[container]) { if (test_for_cycle(subtree, sub[container])) return 1;
} if (left[container]) { if (test_for_cycle(subtree, left[container])) return 1;
} return 0;
}
PRIVATE combi_ambiguity(i, d, l, s) int i, d, l, s; /* * The item with index i * and an item with dot d, leftpointer l, and subpointer s * introduce a combi ambiguity
*/
{ if (left[i] != l) {
/* Combi Ambiguity */
int left1, left2, sub1, sub2, annotation; int selected_left, selected_sub;
#if TRACE
printf("Combi Ambiguity left[%d] = %d, l = %d\n", i, left[i], l); #endif
{ int k;
k = dot[i]; while (yygrammar[k] > 0) k++;
printf("There are two different parses\n");
printf("for the beginning of ``%s'', alternative at ",
yyprintname(-yygrammar[k]));
print_coordinate(k+1);
printf(",\n");
}
printf("upto and containing ``%s'' at ", yyprintname(yygrammar[d-1]));
print_coordinate(d-1);
printf(".\n");
printf("\n");
printf("For ``%s'' at ", yyprintname(yygrammar[d-1]));
print_coordinate(d-1);
printf(",\n"); if (left1 > left2) {
printf("use %%short annotation to select first parse,\n");
printf("use %%long annotation to select second parse.\n");
} else {
printf("use %%long annotation to select first parse,\n");
printf("use %%short annotation to select second parse.\n");
}
printf("\nEND OF GRAMMAR DEBUG INFORMATION\n\n");
yyerror("source text uncovers unhandled grammar ambiguity"); exit(1);
PRIVATE simple_ambiguity(i, d, l, s) int i, d, l, s; /* * The item with index i * and an item with dot d, leftpointer l, and subpointer s * introduce a simple ambiguity
*/
{ /* Simple Ambiguity */
printf("\n");
printf("GRAMMAR DEBUG INFORMATION\n");
printf("\n");
printf("Grammar ambiguity detected.\n");
printf
("Two different ``%s'' derivation trees for the same phrase.\n",
yyprintname(yygrammar[d-1]));
if (test_for_cycle(s, sub[i])) { /* not possible */
printf("Tree 1 contains tree 2 as subtree.\n");
printf
("Use %%prio annotation to select the second tree.\n");
printf("An annotation selecting the first tree\n");
printf("would not resolve the ambiguity.\n");
} elseif (test_for_cycle(sub[i], s)) {
printf("Tree 2 contains tree 1 as subtree.\n");
printf
("Use %%prio annotation to select the first tree.\n");
printf("An annotation selecting the second tree\n");
printf("would not resolve the ambiguity.\n");
} else {
printf("Use %%prio annotation to select an alternative.\n");
}
printf("\nEND OF GRAMMAR DEBUG INFORMATION\n\n");
yyerror("source text uncovers unhandled grammar ambiguity"); exit(1);
} elseif (prio1 > prio2) { #if TRACE
printf("old value wins\n"); #endif
} else { #if TRACE
printf("new value wins\n");
printf("sub[%d] was %d\n", i, sub[i]);
printf("sub[%d] becomes %d\n", i, s); #endif
#if DYNAMICCYCLECHECK if (s >= i) { if (test_for_cycle(i, s)) {
printf("\n");
printf("GRAMMAR DEBUG INFORMATION\n");
printf("\n");
printf("Annotation for ``%s'' allows cyclic derivation.\n",
yyprintname(yygrammar[d-1]));
printf("\nEND OF GRAMMAR DEBUG INFORMATION\n\n");
yyerror("source text uncovers unhandled grammar ambiguity"); exit(1);
}
} #endif
sub[i] = s;
}
}
PRIVATE SEARCH(d, b, l, s) long d, b, l, s; /* * An item with dot d, backpointer b, leftpointer l, subpointer s * has been preliminary added to the current list at position * last_item+1 (sentinel) * if there is no other item with dot d and backpointer b * make this item permanent * otherwise, if the old item has a different backpointer/subpointer * then an ambiguity is detected * if the backpointer/subpointer is the same * the new item is already present
*/
{ registerlong i;
PRIVATE additem ( d, b, l, s) long d, b, l, s; /* * add an item with dot d, backpointer b, leftpointer l, and subpointer s * to the current item list * if there is already an item with dot d and backpointer b * then the grammar is ambigous (see 'SEARCH', which actually adds the item) * * hash optimization: * if there is no item with the same hash code for d and b * it is not neccessary to invoke SEARCH because the item is unique in the * current list
*/
{ #if STATISTICS || TRACE || ILIST int old = last_item; #endif
#if HASHING if (! hashed(d, b)) { #if STATISTICS
nosearch_h++; #endif #if BIGHASH
DHITS[d] = thislist;
BHITS[b] = thislist; #endif
last_item++; if (last_item==(ITEMLIMIT-2)) table_full();
sethash(d, b);
} else { #endif #if BIGHASH if (DHITS[d] == thislist) { /* there may be already an item with code d */ if (BHITS[b] == thislist) { /* there may be already an item with code b */ #if STATISTICS
search++; #endif
SEARCH(d, b, l, s);
} else
{ /* no search necessary: no item with code b entered til now */
#if STATISTICS
nosearch_b++; #endif
BHITS[b] = thislist;
last_item++; if (last_item==(ITEMLIMIT-2)) table_full(); #if HASHING
sethash(d, b); #endif
}
} else { /* no search necessary: no item with code d entered til now */
PRIVATE kernel (prevlist) long prevlist; /* * compute the kernel of the next item list * prevlist points to the previous list * * KERNEL[i] = * { * < N : alpha yygrammar[i] * beta, B,I,0 > * | * < N : alpha * yygrammar[i] beta, B,_,_ > is in IL[i-1] and has index I * }
*/
{ long i; #if TRACE
printf("kernel:\n"); #endif
i = prevlist; while (dot[i]) { if (yygrammar[dot[i]] >= term_base )
{ if (yygrammar[dot[i]] == sym) { #if CHECKVIABLE if (is_viable(dot[i]+1)) #endif
{
additem(dot[i]+1,back[i],i,0);
} #if STATISTICS
GOOD++; #endif
} else { #if STATISTICS
BAD++; #endif
}
}
i++;
}
}
PRIVATE predictor (item) long item; /* * predictor step for item 'item' * * PREDICTOR: * The dot is before a nonterm * add the rules for that nonterm (with the dot at the beginning) * * If * < N : alpha * M beta, B,L,S > * is in IL * then add * < M : * gamma, B',0,0 > * if there is not yet an an item with the first two components * and there is a rule N : gamma * B' is a reference to IL[i]
*/
{ long ruleptr;
ruleptr = yygrammar[dot[item]]; /* xxxxxx printf("ruleptr = %d\n", ruleptr);
*/ #if TRACE
printf("predictor for item %d:\n", item); #endif do { int old = last_item;
#if ! LOOKAHEAD /* (1) ORIGINAL VERSION */
additem(ruleptr+1,thislist,0,0); if ((back[last_item]==thislist) && (last_item>old)) specialitemadded=1; #else /* (2) IMPROVEMENT add test: is current symbol (lookaheadsym) in director set of that rule ?
*/ if (lookup_dirset(ruleptr)) {
additem(ruleptr+1,thislist,0,0); if ((back[last_item]==thislist) && (last_item>old))
specialitemadded=1;
} else { #if TRACE
printf("rejected by lookup_dirset(%d)\n", ruleptr); #endif #if STATISTICS
rules_rej++; #endif
} #endif
PRIVATE completer (item) long item; /* * completer step for item 'item' * * COMPLETER * The dot is at the end of a rule * add the item that triggered this rule to be included * where the dot is advanced after the nonterm * * If * < M : gamma * , B,L,S > * is in CL (with index ITEM) * and there is an item * < N : alpha * M beta, B',L',S' > * in the item list to which the back-pointer B refers (with index I) * then add * < N : alpha M * beta, B',I,ITEM > * if there is not yet an item with the first two components
*/
{ long lhs, old; registerint i; int dot_i;
#if TRACE
printf("completer for item %d:\n", item); #endif
lhs=-yygrammar[dot[item]];
i=back[item];
/* loop over all items in earlier item list */
dot[last_item+1] = 0; /* sentinel */ while (dot_i = dot[i]) {
PRIVATE closure () /* * compute closure for the kernel of the current item list * * CLOSURE * apply PREDICTOR and COMPLETOR * as long as there are changes
*/
{ long i; int oldend;
#if TRACE
printf("closure\n"); #endif
specialitemadded=0; do { #if TRACE
printf("closure iteration\n"); #endif #if STATISTICS
iterationcount++; #endif
i=thislist;
oldend=last_item; while (i<=last_item) { if (yygrammar[dot[i]]<0) completer(i); elseif (yygrammar[dot[i]]<term_base) predictor(i);
i++;
} /* } while (specialitemadded && (oldend!=last_item));
*/
} while ((oldend!=last_item));
}
PRIVATE initial_itemlist() /* * compute initial item list * its kernel is given by the item * YYSTART : * UserRoot EOF * for which the closure is computed *
*/
{
#if DYNAMICITEMS
ITEMLIMIT = ITEMINCR;
dot = (long *) malloc(ITEMLIMIT* sizeof(long)); if (! dot) yymallocerror();
back = (long *) malloc(ITEMLIMIT* sizeof(long)); if (! back) yymallocerror();
left = (long *) malloc(ITEMLIMIT* sizeof(long)); if (! left) yymallocerror();
sub = (long *) malloc(ITEMLIMIT* sizeof(long)); if (! sub) yymallocerror(); #endif
PRIVATE itemlist_sequence() /* * compute the sequence of item lists: * initial_itemlist * and then next_itemlist for each input token
*/
{ #if ! ERREXIT
waserror = 0; #endif
last_item=0;
initial_itemlist();
do {
readsym();
next_itemlist();
} while (sym != eofsym);
}
/*============================================================================*/ /* RETURN LEFTPARSE STEP BY STEP */ /*============================================================================*/
/* * when the recognizer has terminated * leftpointers and sub pointers represent the parse tree * (starting with the item YYSTART : UserRoot EOF * ) * the function 'yyselect' returns the rule number * one after each other in the order of a left derivation * as required by the tree walker implemented in yyactions.c * it uses a stack to keep track of items that need to be processed later
*/
#define STACKINCR 200 int STACKSIZE; int *stack; int stptr = 0;
PUBLICint yyselect() /* * return the next rule number (for the left-derivation) * * this function is called by the generated tree walker * * the stack always contains pointers to items that still must be processed * to produce a left-derivation * the top element must be processed first * if the dot appears inside a rule * M : alpha N * beta * then the items representing the parse for beta are already on the stack * and the items representing alpha N must be pushed * they are represented by the sub pointer of the item * (corresponding to the parse for N) * and the leftpointer of the item * (corresponding to the parse for alpha) * if the item has the form * M : gamma * * (i.e. the rule has been completed) * first the items representing gamma are pushed * then the rule number is returned * subsequent calls will process the items * representing gamma *
*/
{ int i; while (1) {
i = pop(); if (sub[i]) push(sub[i]); if (left[i]) push(left[i]); if (yygrammar[dot[i]] < 0 ) { return yygrammar[dot[i]+1];
}
}
}
/*============================================================================*/ /* MAIN FUNCTION YYPARSE */ /*============================================================================*/
PUBLICint yyparse() /* * main function of the parser * * this function is called by the user's program * * compute the sequence of item lists * which implictely contain the parse tree * and then invokes the generated tree walker YYSTART * which in turn calls yyselect() to obtain the rule numbers * in the order of a left derivation
*/
{ #if BIGHASH
init_DHITS(); #endif
init_dirsets();
lookaheadsym = yylex()+term_base;
lookaheadpos = yypos;
first_lexval();
itemlist_sequence();
#if WALK /* item 'thislist' represents the item * YYSTART : UserRoot EOF * * i.e. the root of the derivation tree
*/
init_stack();
push(thislist);
init_lexelem();
YYSTART(); #endif