/* * Copyright (c) 2002 - 2011 Tony Finch <dot@dotat.at> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE.
*/
/* * unifdef - remove ifdef'ed lines * * This code was derived from software contributed to Berkeley by Dave Yost. * It was rewritten to support ANSI C by Tony Finch. The original version * of unifdef carried the 4-clause BSD copyright licence. None of its code * remains in this version (though some of the names remain) so it now * carries a more liberal licence. * * Wishlist: * provide an option which will append the name of the * appropriate symbol after #else's and #endif's * provide an option which will check symbols after * #else's and #endif's to see that they match their * corresponding #ifdef or #ifndef * * These require better buffer handling, which would also make * it possible to handle all "dodgy" directives correctly.
*/
/* types of input lines: */ typedefenum {
LT_TRUEI, /* a true #if with ignore flag */
LT_FALSEI, /* a false #if with ignore flag */
LT_IF, /* an unknown #if */
LT_TRUE, /* a true #if */
LT_FALSE, /* a false #if */
LT_ELIF, /* an unknown #elif */
LT_ELTRUE, /* a true #elif */
LT_ELFALSE, /* a false #elif */
LT_ELSE, /* #else */
LT_ENDIF, /* #endif */
LT_DODGY, /* flag: directive is not on one line */
LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
LT_PLAIN, /* ordinary line */
LT_EOF, /* end of file */
LT_ERROR, /* unevaluable #if */
LT_COUNT
} Linetype;
/* state of #if processing */ typedefenum {
IS_OUTSIDE,
IS_FALSE_PREFIX, /* false #if followed by false #elifs */
IS_TRUE_PREFIX, /* first non-false #(el)if is true */
IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
IS_FALSE_MIDDLE, /* a false #elif after a pass state */
IS_TRUE_MIDDLE, /* a true #elif after a pass state */
IS_PASS_ELSE, /* an else after a pass state */
IS_FALSE_ELSE, /* an else after a true state */
IS_TRUE_ELSE, /* an else after only false states */
IS_FALSE_TRAILER, /* #elifs after a true are false */
IS_COUNT
} Ifstate;
/* state of comment parser */ typedefenum {
NO_COMMENT = false, /* outside a comment */
C_COMMENT, /* in a comment like this one */
CXX_COMMENT, /* between // and end of line */
STARTING_COMMENT, /* just after slash-backslash-newline */
FINISHING_COMMENT, /* star-backslash-newline in a C comment */
CHAR_LITERAL, /* inside '' */
STRING_LITERAL /* inside "" */
} Comment_state;
/* state of preprocessor line parser */ typedefenum {
LS_START, /* only space and comments on this line */
LS_HASH, /* only space, comments, and a hash */
LS_DIRTY /* this line can't be a preprocessor line */
} Line_state;
/* * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
*/ #define MAXDEPTH 64 /* maximum #if nesting */ #define MAXLINE 4096 /* maximum length of line */ #define MAXSYMS 4096 /* maximum number of symbols */
/* * Sometimes when editing a keyword the replacement text is longer, so * we leave some space at the end of the tline buffer to accommodate this.
*/ #define EDITSLOP 10
/* * For temporary filenames
*/ #defineTEMPLATE"unifdef.XXXXXX"
/* * Globals.
*/
staticbool compblank; /* -B: compress blank lines */ staticbool lnblank; /* -b: blank deleted lines */ staticbool complement; /* -c: do the complement */ staticbool debugging; /* -d: debugging reports */ staticbool iocccok; /* -e: fewer IOCCC errors */ staticbool strictlogic; /* -K: keep ambiguous #ifs */ staticbool killconsts; /* -k: eval constant #ifs */ staticbool lnnum; /* -n: add #line directives */ staticbool symlist; /* -s: output symbol list */ staticbool symdepth; /* -S: output symbol depth */ staticbool text; /* -t: this is a text file */
staticconstchar *symname[MAXSYMS]; /* symbol name */ staticconstchar *value[MAXSYMS]; /* -Dsym=value */ staticbool ignore[MAXSYMS]; /* -iDsym or -iUsym */ staticint nsyms; /* number of symbols */
static FILE *input; /* input file pointer */ staticconstchar *filename; /* input file name */ staticint linenum; /* current line number */ static FILE *output; /* output file pointer */ staticconstchar *ofilename; /* output file name */ staticbool overwriting; /* output overwrites input */ staticchar tempname[FILENAME_MAX]; /* used when overwriting */
staticchar tline[MAXLINE+EDITSLOP];/* input buffer plus space */ staticchar *keyword; /* used for editing #elif's */
/* * A state transition function alters the global #if processing state * in a particular way. The table below is indexed by the current * processing state and the type of the current line. * * Nesting is handled by keeping a stack of states; some transition * functions increase or decrease the depth. They also maintain the * ignore state on a stack. In some complicated cases they have to * alter the preprocessor directive, as follows. * * When we have processed a group that starts off with a known-false * #if/#elif sequence (which has therefore been deleted) followed by a * #elif that we don't understand and therefore must keep, we edit the * latter into a #if to keep the nesting correct. We use memcpy() to * overwrite the 4 byte token "elif" with "if " without a '\0' byte. * * When we find a true #elif in a group, the following block will * always be kept and the rest of the sequence after the next #elif or * #else will be discarded. We edit the #elif into a #else and the * following directive to #endif since this has the desired behaviour. * * "Dodgy" directives are split across multiple lines, the most common * example being a multi-line comment hanging off the right of the * directive. We can handle them correctly only if there is no change * from printing to dropping (or vice versa) caused by that directive. * If the directive is the first of a group we have a choice between * failing with an error, or passing it through unchanged instead of * evaluating it. The latter is not the default to avoid questions from * users about unifdef unexpectedly leaving behind preprocessor directives.
*/ typedefvoid state_fn(void);
/* * Write a line to the output or not, according to command line options.
*/ staticvoid
flushline(bool keep)
{ if (symlist) return; if (keep ^ complement) { bool blankline = tline[strspn(tline, " \t\r\n")] == '\0'; if (blankline && compblank && blankcount != blankmax) {
delcount += 1;
blankcount += 1;
} else { if (lnnum && delcount > 0)
printf("#line %d%s", linenum, newline);
fputs(tline, output);
delcount = 0;
blankmax = blankcount = blankline ? blankcount + 1 : 0;
}
} else { if (lnblank)
fputs(newline, output);
exitstat = 1;
delcount += 1;
blankcount = 0;
} if (debugging)
fflush(output);
}
/* * The driver for the state machine.
*/ staticvoid
process(void)
{ /* When compressing blank lines, act as if the file
is preceded by a large number of blank lines. */
blankmax = blankcount = 1000; for (;;) {
Linetype lineval = parseline();
trans_table[ifstate[depth]][lineval]();
debug("process line %d %s -> %s depth %d",
linenum, linetype_name[lineval],
ifstate_name[ifstate[depth]], depth);
}
}
/* * Flush the output and handle errors.
*/ staticvoid
closeout(void)
{ if (symdepth && !zerosyms)
printf("\n"); if (fclose(output) == EOF) {
warn("couldn't write to %s", ofilename); if (overwriting) {
unlink(tempname);
errx(2, "%s unchanged", filename);
} else { exit(2);
}
}
}
/* * Clean up and exit.
*/ staticvoid
done(void)
{ if (incomment)
error("EOF in comment");
closeout(); if (overwriting && rename(tempname, ofilename) == -1) {
warn("couldn't rename temporary file");
unlink(tempname);
errx(2, "%s unchanged", ofilename);
} exit(exitstat);
}
/* * Parse a line and determine its type. We keep the preprocessor line * parser state between calls in the global variable linestate, with * help from skipcomment().
*/ static Linetype
parseline(void)
{ constchar *cp; int cursym; int kwlen;
Linetype retval;
Comment_state wascomment;
linenum++; if (fgets(tline, MAXLINE, input) == NULL) return (LT_EOF); if (newline == NULL) { if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
newline = newline_crlf; else
newline = newline_unix;
}
retval = LT_PLAIN;
wascomment = incomment;
cp = skipcomment(tline); if (linestate == LS_START) { if (*cp == '#') {
linestate = LS_HASH;
firstsym = true;
cp = skipcomment(cp + 1);
} elseif (*cp != '\0')
linestate = LS_DIRTY;
} if (!incomment && linestate == LS_HASH) {
keyword = tline + (cp - tline);
cp = skipsym(cp);
kwlen = cp - keyword; /* no way can we deal with a continuation inside a keyword */ if (strncmp(cp, "\\\r\n", 3) == 0 ||
strncmp(cp, "\\\n", 2) == 0)
Eioccc(); if (strlcmp("ifdef", keyword, kwlen) == 0 ||
strlcmp("ifndef", keyword, kwlen) == 0) {
cp = skipcomment(cp); if ((cursym = findsym(cp)) < 0)
retval = LT_IF; else {
retval = (keyword[2] == 'n')
? LT_FALSE : LT_TRUE; if (value[cursym] == NULL)
retval = (retval == LT_TRUE)
? LT_FALSE : LT_TRUE; if (ignore[cursym])
retval = (retval == LT_TRUE)
? LT_TRUEI : LT_FALSEI;
}
cp = skipsym(cp);
} elseif (strlcmp("if", keyword, kwlen) == 0)
retval = ifeval(&cp); elseif (strlcmp("elif", keyword, kwlen) == 0)
retval = ifeval(&cp) - LT_IF + LT_ELIF; elseif (strlcmp("else", keyword, kwlen) == 0)
retval = LT_ELSE; elseif (strlcmp("endif", keyword, kwlen) == 0)
retval = LT_ENDIF; else {
linestate = LS_DIRTY;
retval = LT_PLAIN;
}
cp = skipcomment(cp); if (*cp != '\0') {
linestate = LS_DIRTY; if (retval == LT_TRUE || retval == LT_FALSE ||
retval == LT_TRUEI || retval == LT_FALSEI)
retval = LT_IF; if (retval == LT_ELTRUE || retval == LT_ELFALSE)
retval = LT_ELIF;
} if (retval != LT_PLAIN && (wascomment || incomment)) {
retval += LT_DODGY; if (incomment)
linestate = LS_DIRTY;
} /* skipcomment normally changes the state, except if the last line of the file lacks a newline, or
if there is too much whitespace in a directive */ if (linestate == LS_HASH) {
size_t len = cp - tline; if (fgets(tline + len, MAXLINE - len, input) == NULL) { /* append the missing newline */
strcpy(tline + len, newline);
cp += strlen(newline);
linestate = LS_START;
} else {
linestate = LS_DIRTY;
}
}
} if (linestate == LS_DIRTY) { while (*cp != '\0')
cp = skipcomment(cp + 1);
}
debug("parser line %d state %s comment %s line", linenum,
comment_name[incomment], linestate_name[linestate]); return (retval);
}
/* * These are the binary operators that are supported by the expression * evaluator.
*/ static Linetype op_strict(int *p, int v, Linetype at, Linetype bt) { if(at == LT_IF || bt == LT_IF) return (LT_IF); return (*p = v, v ? LT_TRUE : LT_FALSE);
} static Linetype op_lt(int *p, Linetype at, int a, Linetype bt, int b) { return op_strict(p, a < b, at, bt);
} static Linetype op_gt(int *p, Linetype at, int a, Linetype bt, int b) { return op_strict(p, a > b, at, bt);
} static Linetype op_le(int *p, Linetype at, int a, Linetype bt, int b) { return op_strict(p, a <= b, at, bt);
} static Linetype op_ge(int *p, Linetype at, int a, Linetype bt, int b) { return op_strict(p, a >= b, at, bt);
} static Linetype op_eq(int *p, Linetype at, int a, Linetype bt, int b) { return op_strict(p, a == b, at, bt);
} static Linetype op_ne(int *p, Linetype at, int a, Linetype bt, int b) { return op_strict(p, a != b, at, bt);
} static Linetype op_or(int *p, Linetype at, int a, Linetype bt, int b) { if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE)) return (*p = 1, LT_TRUE); return op_strict(p, a || b, at, bt);
} static Linetype op_and(int *p, Linetype at, int a, Linetype bt, int b) { if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE)) return (*p = 0, LT_FALSE); return op_strict(p, a && b, at, bt);
}
/* * An evaluation function takes three arguments, as follows: (1) a pointer to * an element of the precedence table which lists the operators at the current * level of precedence; (2) a pointer to an integer which will receive the * value of the expression; and (3) a pointer to a char* that points to the * expression to be evaluated and that is updated to the end of the expression * when evaluation is complete. The function returns LT_FALSE if the value of * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
*/ struct ops;
typedef Linetype eval_fn(conststruct ops *, int *, constchar **);
static eval_fn eval_table, eval_unary;
/* * The precedence table. Expressions involving binary operators are evaluated * in a table-driven way by eval_table. When it evaluates a subexpression it * calls the inner function with its first argument pointing to the next * element of the table. Innermost expressions have special non-table-driven * handling.
*/ staticconststruct ops {
eval_fn *inner; struct op { constchar *str;
Linetype (*fn)(int *, Linetype, int, Linetype, int);
} op[5];
} eval_ops[] = {
{ eval_table, { { "||", op_or } } },
{ eval_table, { { "&&", op_and } } },
{ eval_table, { { "==", op_eq },
{ "!=", op_ne } } },
{ eval_unary, { { "<=", op_le },
{ ">=", op_ge },
{ "<", op_lt },
{ ">", op_gt } } }
};
/* * Function for evaluating the innermost parts of expressions, * viz. !expr (expr) number defined(symbol) symbol * We reset the constexpression flag in the last two cases.
*/ static Linetype
eval_unary(conststruct ops *ops, int *valp, constchar **cpp)
{ constchar *cp; char *ep; int sym; bool defparen;
Linetype lt;
/* * Evaluate the expression on a #if or #elif line. If we can work out * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we * return just a generic LT_IF.
*/ static Linetype
ifeval(constchar **cpp)
{ int ret; int val = 0;
/* * Skip over comments, strings, and character literals and stop at the * next character position that is not whitespace. Between calls we keep * the comment state in the global variable incomment, and we also adjust * the global variable linestate when we see a newline. * XXX: doesn't cope with the buffer splitting inside a state transition.
*/ staticconstchar *
skipcomment(constchar *cp)
{ if (text || ignoring[depth]) { for (; isspace((unsignedchar)*cp); cp++) if (*cp == '\n')
linestate = LS_START; return (cp);
} while (*cp != '\0') /* don't reset to LS_START after a line continuation */ if (strncmp(cp, "\\\r\n", 3) == 0)
cp += 3; elseif (strncmp(cp, "\\\n", 2) == 0)
cp += 2; elseswitch (incomment) { case NO_COMMENT: if (strncmp(cp, "/\\\r\n", 4) == 0) {
incomment = STARTING_COMMENT;
cp += 4;
} elseif (strncmp(cp, "/\\\n", 3) == 0) {
incomment = STARTING_COMMENT;
cp += 3;
} elseif (strncmp(cp, "/*", 2) == 0) {
incomment = C_COMMENT;
cp += 2;
} elseif (strncmp(cp, "//", 2) == 0) {
incomment = CXX_COMMENT;
cp += 2;
} elseif (strncmp(cp, "\'", 1) == 0) {
incomment = CHAR_LITERAL;
linestate = LS_DIRTY;
cp += 1;
} elseif (strncmp(cp, "\"", 1) == 0) {
incomment = STRING_LITERAL;
linestate = LS_DIRTY;
cp += 1;
} elseif (strncmp(cp, "\n", 1) == 0) {
linestate = LS_START;
cp += 1;
} elseif (strchr(" \r\t", *cp) != NULL) {
cp += 1;
} else return (cp); continue; case CXX_COMMENT: if (strncmp(cp, "\n", 1) == 0) {
incomment = NO_COMMENT;
linestate = LS_START;
}
cp += 1; continue; case CHAR_LITERAL: case STRING_LITERAL: if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
(incomment == STRING_LITERAL && cp[0] == '\"')) {
incomment = NO_COMMENT;
cp += 1;
} elseif (cp[0] == '\\') { if (cp[1] == '\0')
cp += 1; else
cp += 2;
} elseif (strncmp(cp, "\n", 1) == 0) { if (incomment == CHAR_LITERAL)
error("unterminated char literal"); else
error("unterminated string literal");
} else
cp += 1; continue; case C_COMMENT: if (strncmp(cp, "*\\\r\n", 4) == 0) {
incomment = FINISHING_COMMENT;
cp += 4;
} elseif (strncmp(cp, "*\\\n", 3) == 0) {
incomment = FINISHING_COMMENT;
cp += 3;
} elseif (strncmp(cp, "*/", 2) == 0) {
incomment = NO_COMMENT;
cp += 2;
} else
cp += 1; continue; case STARTING_COMMENT: if (*cp == '*') {
incomment = C_COMMENT;
cp += 1;
} elseif (*cp == '/') {
incomment = CXX_COMMENT;
cp += 1;
} else {
incomment = NO_COMMENT;
linestate = LS_DIRTY;
} continue; case FINISHING_COMMENT: if (*cp == '/') {
incomment = NO_COMMENT;
cp += 1;
} else
incomment = C_COMMENT; continue; default:
abort(); /* bug */
} return (cp);
}
/* * Skip macro arguments.
*/ staticconstchar *
skipargs(constchar *cp)
{ constchar *ocp = cp; int level = 0;
cp = skipcomment(cp); if (*cp != '(') return (cp); do { if (*cp == '(')
level++; if (*cp == ')')
level--;
cp = skipcomment(cp+1);
} while (level != 0 && *cp != '\0'); if (level == 0) return (cp); else /* Rewind and re-detect the syntax error later. */ return (ocp);
}
/* * Skip over an identifier.
*/ staticconstchar *
skipsym(constchar *cp)
{ while (!endsym(*cp))
++cp; return (cp);
}
/* * Look for the symbol in the symbol table. If it is found, we return * the symbol table index, else we return -1.
*/ staticint
findsym(constchar *str)
{ constchar *cp; int symind;
cp = skipsym(str); if (cp == str) return (-1); if (symlist) { if (symdepth && firstsym)
printf("%s%3d", zerosyms ? "" : "\n", depth);
firstsym = zerosyms = false;
printf("%s%.*s%s",
symdepth ? " " : "",
(int)(cp-str), str,
symdepth ? "" : "\n"); /* we don't care about the value of the symbol */ return (0);
} for (symind = 0; symind < nsyms; ++symind) { if (strlcmp(symname[symind], str, cp-str) == 0) {
debug("findsym %s %s", symname[symind],
value[symind] ? value[symind] : ""); return (symind);
}
} return (-1);
}
/* * Add a symbol to the symbol table.
*/ staticvoid
addsym(bool ignorethis, bool definethis, char *sym)
{ int symind; char *val;
/* * Compare s with n characters of t. * The same as strncmp() except that it checks that s[n] == '\0'.
*/ staticint
strlcmp(constchar *s, constchar *t, size_t n)
{ while (n-- && *t != '\0') if (*s != *t) return ((unsignedchar)*s - (unsignedchar)*t); else
++s, ++t; return ((unsignedchar)*s);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.