/* sumlines.c - total the numbers appearing in various input lines. */ /* B. D. McKay. Version of March 30, 2017. */
#ifndef GMP #define GMP 1 /* Non-zero if gmp multi-precise integers are allowed. In this case you need the GNU multi-precision library,
available with -lgmp if it is installed. */ #endif
#define HELPTEXT \ " Sum lines matching specified formats.\n\
\n\
Any number of input files can be given. \"-\" means stdin.\n\ If there are no files given, just stdin is assumed.\n\
File names can contain wildcards, in which case all matching files\n\
are used in numerically sorted order.\n\
\n\
Formats are read from four sources in this order:\n\
(1) Any files mentioned with -f on the command line (any number).\n\
(2) The file named in the environment variable SUMLINES.FMT (if any)\n\
(3) The file sumlines.fmt in the current directory (if it exists)\n\
(4) The file sumlines.fmt in the home directory (if it exists)\n\
All these are read if they exist and the results concatenated.\n\
Formats exactly matching earlier formats (except perhaps for flags)\n\
are not used.\n\
\n\
Each format occupies exactly two lines. The first line gives a\n\
list of flags (DEFAULT FINAL ERROR UNIQUE COUNT CONTINUE NUMERIC\n\
SILENT ENDFILE P=# separated by spaces, commas or |s).\n\
The second line gives the format itself.\n\
\n\
Example. This totals the summary lines of autoson runs:\n\ DEFAULT# comment \n\
cpu=%fu,%fs,%fx pf=%d\n\
There can also be blank lines and lines with only comments, but\n\ not between the flags line and the format itself.\n\
\n\
-d don't read sumlines.fmt or ~/sumlines.fmt or $SUMLINES.FMT \n\
-w suppresses warning messages about no matching lines or no\n\
matching final lines.\n\
-W in addition, suppresses warning about missing cases.\n\
-n don't write the number of matching lines for each format.\n\
-v produces a list of all the formats.\n"
#defineDEFAULT 0 /* No special flags */ #define FINAL 1 /* At least one of these must be in each input file */ #define ERROR 2 /* Must be none of these */ #define UNIQUE 4 /* The %s and %c parts must be unique over all inputs */ #define COUNT 8 /* The output only states how many lines matched */ #defineCONTINUE 16 /* Try to match later formats too */ #define NUMERIC 32 /* Use numerical comparison (see numstrcmp() below) */ #define SILENT 64 /* Don't report, just check */ #define ENDFILE 128 /* Usually appears at end of output */
/* The formats are tried against each input line one at a time, and the first one that matches is accepted. The entire line must match. If the CONTINUE flag is present, the input line is also matched against further formats.
Except in the case of formats with the COUNT flag, each format that matches any lines produces output giving the total value of each of the integers %d or real numbers %f in the lines which match. If there are any %s or %c controls in the format, the output is given separately for each value of the matching strings which appear in the input lines.
In the case of the COUNT flag, the program only reports the number of input lines which matched the format.
If a format has the UNIQUE flag, no two input lines may match with the same values of the %s and %c controls present. Otherwise a warning message is written for each duplicate match.
The sequence P=# where # is an integer value defines the base for the %p directive. There can be no spaces in the sequence "P=#". The default base is 2.
%d - matches an integer (small enough for 64 bits) %x - same as %d but accumulates maximum rather than the sum %n - same as %d but accumulates minimum rather than the sum %p - same as %d but accumulates the value modulo a base %m - matches a integer of unbounded size (if GMP!=0) %f - matches a real number of the form ddddd.ddd or -ddddd.ddd %v - same as %f but reports the average rather than the sum %X - same as %f but reports the maximum rather than the sum %h - similar to %d:%d:%f taken as h:m:s with a single floating value %sx - matches a string, where 'x' is any character. If 'x' is not a space, match zero or more characters from the current position up but not including the first 'x'. If 'x' is a space, match one or more characters from the current position up to and including the first non-space character which is followed by a space. %c - matches a non-white character %% - matches the character '%' % - (with a space following the '%') matches zero or more spaces or tabs, as many as appear in the input. In the output, this sequence appears as one space. % - (appearing exactly at the end of the format) matches zero or more spaces at the end of the line. In the output, nothing. %*d, %*m, %*x, %*p, %*f, %*sx, %*c - these are similar to the versions without the '*' except that the value is ignored (not used for summing, and not used to divide the output). In the output, this field appears as a single character '*'. %# - matches an unsigned integer. For each format containing this control, a report is made of any breaks or duplicates in the sequence of matching numbers. (So this is useful for checking a sequence of case numbers.) At most one %# may appear in each format. %l - matches a list of arbitrarily many (%d sized) integers
At least one FINAL format must match in each file or a warning is given (unless -w is used, in which case no warning is given).
A format marked ENDFILE will cause sumlines to act as if it started reading from a new input file. This can have some effects on the order of output lines.
*/
#define D 0 /* Code for "integer" */ #define F 1 /* Code for "real" */ #define M 2 /* Code for "multiprecision integer" */ #define X 3 /* Code for "integer, take maximum" */ #define V 4 /* Code for "real, take average" */ #define P 5 /* Code for "integer, modulo some base" */ #define LD 6 /* Code for "list of integer" */ #define H 8 /* Code for "h:m:s" */ #define FX 9 /* Code for "real, take maximum" */ #define N 10 /* Code for "integer, take minimum" */
#define MAXLINELEN 100000 /* Maximum input line size
(longer lines are broken in bits) */ #define MAXVALUES 32 /* Maximum total number of
%d,%x,%n,%p,%m,%v,%f,%h or %l items in a format */
staticint
numstrcmp(char *s1, char *s2) /* Same behaviour as strcmp(), except that when an unsigned integer is found in each string, the numerical values are compared instead of the ascii values. Overflow is impossible. Leading spaces before numbers are considered part of the numbers. A number in one string
is considered less than a non-number in the other string. */
{ char *a1,*a2;
while (1)
{ for (a1 = s1; *a1 == ' '; ++a1) {} if (isdigit(*a1))
{ for (s1 = a1+1; isdigit(*s1); ++s1) {}
} else
{
a1 = s1;
++s1;
}
for (a2 = s2; *a2 == ' '; ++a2) {} if (isdigit(*a2))
{ for (s2 = a2+1; isdigit(*s2); ++s2) {}
} else
{
a2 = s2;
++s2;
}
if (!isdigit(*a1))
{ if (!isdigit(*a2))
{ if (*a1 < *a2) return -1; if (*a1 > *a2) return 1; if (*a1 == '\0') return 0;
} else return 1;
} else
{ if (!isdigit(*a2)) return -1; else
{ for (; *a1 == '0'; ++a1) {} for (; *a2 == '0'; ++a2) {}
if (s1-a1 < s2-a2) return -1; if (s1-a1 > s2-a2) return 1; for (; a1 < s1 && *a1 == *a2; ++a1, ++a2) {} if (a1 < s1)
{ if (*a1 < *a2) return -1; elsereturn 1;
}
}
}
}
}
staticvoid
writeline(char *outf, number *val, unsignedlong count) /* Write an output line with the given format and values */
{ int i,n;
integer mins,nsecs; double secs,hms;
boolean neg;
n = 0;
for (; *outf != '\0'; ++outf)
{ if (*outf == '%')
{
++outf; if (*outf == '%' || *outf == '#')
putchar(*outf); elseif (*outf == 'd' || *outf == 'x'
|| *outf == 'n' || *outf == 'p')
printf(dout,val[n++].d); elseif (*outf == 'f')
printf(fout,val[n++].f); elseif (*outf == 'v')
printf(vout,val[n++].f/count); elseif (*outf == 'X')
printf(Xout,val[n++].f); elseif (*outf == 'h')
{ if (val[n].f < 0)
{
neg = TRUE;
hms = -val[n].f;
} else
{
neg = FALSE;
hms = val[n].f;
}
mins = hms/60.0;
secs = hms - 60*mins;
nsecs = secs;
++n; if (neg) printf("-"); if (secs == nsecs)
printf(hmsout1,mins/60,mins%60,nsecs); else
printf(hmsout2,mins/60,mins%60,secs);
} elseif (*outf == 'l')
{ for (i = 0; i < val[n].l->nvals; ++i)
{ if (i > 0) printf(" ");
printf(dout,val[n].l->val[i]);
}
++n;
} #if GMP elseif (*outf == 'm')
mpz_out_str(NULL,10,*(val[n++].m)); #endif else
{
fprintf(stderr,">E unknown output format %%%c\n",*outf); exit(1);
}
} else
putchar(*outf);
}
}
staticvoid
print_common(countnode *root) /* Print the common ends of the formats in the tree */
{ int code;
countnode *p; char *s0,*s1,*t0,*t1; int i,comm0,comm1,minlen,maxlen;
while (p)
{ switch (code) /* deliberate flow-ons */
{ case A: if (p->left)
{
p = p->left; break;
} case L:
t0 = t1 = p->fmt; for (i = 0; i < comm0; ++i) if (s0[i] != t0[i]) break;
comm0 = i;
while (*t1 != '\0') ++t1; for (i = 1; i <= comm1; ++i) if (s1[-i] != t1[-i]) break;
comm1 = i-1; if (t1-t0 < minlen) minlen = t1-t0; if (t1-t0 > maxlen) maxlen = t1-t0;
if (p->right)
{
p = p->right;
code = A; break;
} case R: if (p->parent && p->parent->left == p) code = L; else code = R;
p = p->parent; break;
}
}
staticvoid
add_one(countnode **to_root, char *fmt, integer pmod, int nval,
number *val, int *valtype, int which, boolean numcompare) /* Add one match to the node with the given format, creating it if it is new.
The tree is then splayed to ensure good efficiency. */
{ int i,j,cmp;
countnode *p,*ppar,*new_node;
integer w;
staticint
scanline(char *s, char *f, number *val, int *valtype,
integer *seqno, char *outf) /* Perform sscanf-like scan of line. The whole format must match. outf is set to be an output format with unassigned values replaced by '*' and %s replaced by what it matches. Assigned values except %s are put into val[] with their types in valtype[]. The number of values (not counting %#) is returned. Integers matching %# are put into *seqno, with an error if there are more than one, and -1 if there are none. If the format doesn't match, -1 is returned. WARNING: the gmp and ilist values are pointers to static data, so they need to be copied if the values array is copied. See the comments at the start of the program for more information.
*/
{ int n; /* Number of values assigned */ int digit;
boolean doass,neg,oflow,badgmp;
integer ival; double dval,digval,comval; char ends,*saves; static boolean gmp_warning = FALSE;
integer *ilist;
size_t ilist_sz; int nilist; #if GMP char mp_line[MAXLINELEN+1],*mp; #endif
n = 0;
*seqno = -1;
badgmp = oflow = FALSE;
while (*f != '\0')
{ if (*f == '%')
{
++f; if (*f == '*')
{
doass = FALSE;
++f;
} else
doass = TRUE;
staticvoid
sort_formats(int *order, int numformats) /* Make order[0..numformats-1] a permutation of 0..numformats-1 being
a good order to display the results. */
{ double score[MAXFORMATS]; int h,i,j,iw;
for (i = 0; i < numformats; ++i)
{ if (matching_lines[i] == 0)
score[i] = -1.0; else
score[i] = i +
((100.0*total_position[i]) / matching_lines[i]) * numformats;
order[i] = i;
}
j = numformats / 3;
h = 1; do
h = 3 * h + 1; while (h < j);
do
{ for (i = h; i < numformats; ++i)
{
iw = order[i]; for (j = i; score[order[j-h]] > score[iw]; )
{
order[j] = order[j-h]; if ((j -= h) < h) break;
}
order[j] = iw;
}
h /= 3;
} while (h > 0);
}
staticvoid
read_formats(char *filename, int *numformatsp, boolean mustexist) /* Read formats from the given file. */
{
FILE *f; int i,c,flags,ignore; char flagname[52]; char line[MAXLINELEN+3];
integer pmod; char *s;
boolean oflow,badpmod; int digit;
if (strcmp(filename,"-") == 0)
f = stdin; elseif ((f = fopen(filename,"r")) == NULL)
{ if (mustexist)
{
fprintf(stderr,">E Can't open %s for reading.\n",filename); exit(1);
} return;
}
line[MAXLINELEN+2] = '\0';
for (;;)
{ if ((c = getc(f)) == EOF) break;
while (c == ' ' || c == '\t') c = getc(f); if (c == '\n') continue; if (c == EOF) break;
if (c == '#')
{ while (c != '\n' && c != EOF) c = getc(f); continue;
}
ungetc(c,f);
flags = 0;
pmod = 2; for (;;)
{ while ((c = getc(f)) == ' '
|| c == '|' || c == ',' || c == '\t') {} if (c == '#') while (c != '\n' && c != EOF) c = getc(f); if (c == '\n' || c == EOF) break;
ungetc(c,f);
/* There appear to be some issues with the [ flag in fscanf,
* as to whether a null is appended. We'll take no chances. */ for (i = 0; i < 52; ++i) flagname[i] = '\0';
ignore = fscanf(f,"%50[A-Za-z0-9=]",flagname);
staticvoid
read_local_formats(int *numformatsp) /* Read formats from sumlines.fmt in current directory */
{
read_formats("sumlines.fmt",numformatsp,FALSE);
}
static boolean
readoneline(FILE *f, char *line, int size, int *nulls) /* Get a line. Read at most size-1 chars until EOF or \n. If \n is read, it is stored. Then \0 is appended.
*nulls is set to the number of NUL chars (which are also stored). */
{ int i,c;
*nulls = 0; for (i = 0; i < size-1; ++i)
{
c = getc(f); if (c == EOF) break;
line[i] = c; if (c == '\0') ++*nulls; if (c == '\n') {++i; break;}
}
line[i] = '\0';
staticvoid
doglob(char *patt, glob_t *globlk) /* Find all files matching the given pattern, numeric sorting.
Give a warning message if there are none. */
{ int ret;
ret = glob(patt,GLOB_FLAGS,NULL,globlk);
if (ret != 0) globlk->gl_pathc = 0;
if (ret == GLOB_NOSPACE)
{
fprintf(stderr,"ERROR: ran out of space during glob()\n"); exit(1);
} if (ret == GLOB_ERR)
{
fprintf(stderr,"ERROR: during glob(%s)\n",patt); exit(1);
} if (ret != 0 && ret != GLOB_NOMATCH)
{
fprintf(stderr,"ERROR: value %d from glob(%s)\n",ret,patt); exit(1);
}
if (globlk->gl_pathc == 0) printf("WARNING: no files match %s\n",patt);
if (globlk->gl_pathc >= 2)
qsort(globlk->gl_pathv,globlk->gl_pathc,sizeof(char*),pnumstrcmp);
}
for (i = 0; i < numformats; ++i)
{
nvals
= scanline(line,format[i].fmt,val,valtype,&seq,outf); if (nvals >= 0)
{ if (HAS(i,ENDFILE)) line_number = 0;
++matched; if (HAS(i,FINAL)) ++finalmatched; if (HAS(i,ERROR)) ++errorlines;
++matching_lines[i];
total_position[i] += line_number;
add_one(&count_root[i],outf,format[i].pmod,nvals,
val,valtype,i,HAS(i,NUMERIC)); if (!noWarn && matching_lines[i] > 1 && seq >= 0
&& seq != lastseq[i]+1)
{
printf("WARNING: Sequence number"); if (seq == lastseq[i])
{
printf(" ");
printf(dout,seq);
printf(" is repeated.\n");
} elseif (seq != lastseq[i]+2)
{
printf("s ");
printf(dout,lastseq[i]+1);
printf("-");
printf(dout,seq-1);
printf(" are missing.\n");
} else
{
printf(" ");
printf(dout,seq-1);
printf(" is missing.\n");
}
}
lastseq[i] = seq; if (!HAS(i,CONTINUE)) break;
}
}
if (i == numformats) ++unmatched;
} if (errorlines != 0)
printf("ERRORS: Error lines in file %s\n",filename); elseif (matched == 0 && !nowarn)
printf("WARNING: No matching lines in file %s\n",filename); elseif (finalmatched == 0 && havefinal && !nowarn)
printf("WARNING: No final lines in file %s\n",filename); if (nullcount > 0)
printf("WARNING: %ld NULs found in file %s\n",
nullcount,filename); if (infile != stdin) fclose(infile);
totalerrorlines += errorlines;
} if (pglob == &globlk) globfree(pglob);
}
sort_formats(order,numformats);
for (j = 0; j < numformats; ++j)
{
i = order[j]; if (HAS(i,SILENT)) continue;
if (HAS(i,COUNT))
{ if (matching_lines[i] > 0)
printf("%5lu lines matched ",matching_lines[i]);
print_common(count_root[i]);
} else
print_counts(count_root[i],printcounts);
}
if (unmatched > 0)
printf("%5lu non-empty lines not matched\n",unmatched); if (argc > firstarg) printf("%5lu files read altogether\n",numfiles); if (totalerrorlines > 0) printf("%5lu errors found\n",totalerrorlines);
exit(0);
}
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.33Angebot
Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.