/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <stdio.h> #include <stdlib.h> #include <string.h> #if (defined(_WIN32) || defined(__IBMC__)) #include <io.h> #else #include <unistd.h> #endif #include"cpp.h" /* * lexical FSM encoding * when in state state, and one of the characters * in ch arrives, enter nextstate. * States >= S_SELF are either final, or at least require special action. * In 'fsm' there is a line for each state X charset X nextstate. * List chars that overwrite previous entries later (e.g. C_ALPH * can be overridden by '_' by a later entry; and C_XX is the * universal set, and should always be first. * States above S_SELF are represented in the big table as negative values. * S_SELF and S_SELFB encode the resulting token type in the upper bits. * These actions differ in that S_SELF doesn't have a lookahead char, * S_SELFB does. * * The encoding is blown out into a big table for time-efficiency. * Entries have * nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits.
*/
/* first index is char, second is state */ /* increase #states to power of 2 to encourage use of shift */ staticshort bigfsm[256][MAXSTATE];
void
expandlex(void)
{ conststruct fsm *fp; int i, j, nstate;
for (fp = fsm; fp->state >= 0; fp++)
{ for (i = 0; fp->ch[i]; i++)
{
nstate = fp->nextstate; if (nstate >= S_SELF)
nstate = ~nstate; switch (fp->ch[i])
{
case C_XX: /* random characters */ for (j = 0; j < 256; j++)
bigfsm[j][fp->state] = (short) nstate; continue; case C_ALPH: for (j = 0; j < 256; j++) if (('a' <= j && j <= 'z') || ('A' <= j && j <= 'Z')
|| j == '_')
bigfsm[j][fp->state] = (short) nstate; continue; case C_NUM: for (j = '0'; j <= '9'; j++)
bigfsm[j][fp->state] = (short) nstate; continue; default:
bigfsm[fp->ch[i]][fp->state] = (short) nstate;
}
}
}
/* * install special cases for ? (trigraphs), \ (splicing), runes, and * EOB
*/ for (i = 0; i < MAXSTATE; i++)
{ for (j = 0; j < 0xFF; j++) if (j == '?' || j == '\\' || j == '\n' || j == '\r')
{ if (bigfsm[j][i] > 0)
bigfsm[j][i] = ~bigfsm[j][i];
bigfsm[j][i] &= ~QBSBIT;
}
bigfsm[EOB][i] = ~S_EOB; if (bigfsm[EOFC][i] >= 0)
bigfsm[EOFC][i] = ~S_EOF;
}
}
void
fixlex(void)
{ /* do C++ comments? */ if ((Cplusplus == 0) || (Cflag != 0))
bigfsm['/'][COM1] = bigfsm['x'][COM1];
}
/* * fill in a row of tokens from input, terminated by NL or END * First token is put at trp->lp. * Reset is non-zero when the input buffer can be "rewound." * The value is a flag indicating that possible macros have * been seen in the row.
*/ int
gettokens(Tokenrow * trp, int reset)
{ int c, state, oldstate;
uchar *ip;
Token *tp, *maxp; int runelen;
Source *s = cursource; int nmac = 0;
tp = trp->lp;
ip = s->inp; if (reset)
{
s->lineinc = 0; if (ip >= s->inl)
{ /* nothing in buffer */
s->inl = s->inb;
fillbuf(s);
ip = s->inp = s->inb;
} else if (ip >= s->inb + (3 * INS / 4))
{
memmove(s->inb, ip, 4 + s->inl - ip);
s->inl = s->inb + (s->inl - ip);
ip = s->inp = s->inb;
}
}
maxp = &trp->bp[trp->max];
runelen = 1; for (;;)
{
continue2: if (tp >= maxp)
{
trp->lp = tp;
tp = growtokenrow(trp); // coverity[overrun-local : FALSE] - a multiple of trp->max is allocated, not trp->max itself
maxp = &trp->bp[trp->max];
}
tp->type = UNCLASS;
tp->t = ip;
tp->wslen = 0;
state = START; for (;;)
{
oldstate = state;
c = *ip;
if ((state = bigfsm[c][state]) >= 0)
{
ip += runelen;
runelen = 1; continue;
}
state = ~state;
reswitch: switch (state & 0177)
{ case S_SELF:
ip += runelen;
runelen = 1; /*fall-through*/ case S_SELFB:
tp->type = (unsignedchar) GETACT(state);
tp->len = ip - tp->t;
tp++; goto continue2;
case S_NAME: /* like S_SELFB but with nmac check */
tp->type = NAME;
tp->len = ip - tp->t;
nmac |= quicklook(tp->t[0], tp->len > 1 ? tp->t[1] : 0);
tp++; goto continue2;
case S_WS:
tp->wslen = ip - tp->t;
tp->t = ip;
state = START; continue;
default: if ((state & QBSBIT) == 0)
{
ip += runelen;
runelen = 1; continue;
}
state &= ~QBSBIT;
s->inp = ip;
if (c == '\n')
{ while (s->inp + 1 >= s->inl && fillbuf(s) != EOF);
/* skip DOS line ends */ if (((s->inp[n] == '\r') && (s->inp[n+1] == '\n')) ||
((s->inp[n] == '\n') && (s->inp[n+1] == '\r')))
n++;
if ((s->inp[n] == '\n') || (s->inp[n] == '\r'))
{
memmove(s->inp, s->inp + n + 1, s->inl - s->inp + n + 2);
s->inl -= n + 1; return 1;
} return 0;
}
int
fillbuf(Source * s)
{ int n = 0;
if (s->fd >= 0)
{
n = read(s->fd, (char *) s->inl, INS / 8); if (n <= 0)
n = 0;
}
s->inl += n;
s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOB; if (n == 0)
{
s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOFC; return EOF;
} return 0;
}
/* * Push down to new source of characters. * If fd>0 and str==NULL, then from a file `name'; * if fd==-1 and str, then from the string.
*/
Source *
setsource(char *name, int path, int fd, charconst *str, int wrap)
{
Source *s = new(Source);
size_t len;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.