Annotation of rpl/rplawk/b.c, revision 1.1
1.1 ! bertrand 1: /****************************************************************
! 2: Copyright (C) Lucent Technologies 1997
! 3: All Rights Reserved
! 4:
! 5: Permission to use, copy, modify, and distribute this software and
! 6: its documentation for any purpose and without fee is hereby
! 7: granted, provided that the above copyright notice appear in all
! 8: copies and that both that the copyright notice and this
! 9: permission notice and warranty disclaimer appear in supporting
! 10: documentation, and that the name Lucent Technologies or any of
! 11: its entities not be used in advertising or publicity pertaining
! 12: to distribution of the software without specific, written prior
! 13: permission.
! 14:
! 15: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
! 16: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
! 17: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
! 18: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 19: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
! 20: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
! 21: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
! 22: THIS SOFTWARE.
! 23: ****************************************************************/
! 24:
! 25: /* lasciate ogne speranza, voi ch'intrate. */
! 26:
! 27: #define DEBUG
! 28:
! 29: #include <ctype.h>
! 30: #include <stdio.h>
! 31: #include <string.h>
! 32: #include <stdlib.h>
! 33: #include "awk.h"
! 34: #include "ytab.h"
! 35:
! 36: #define HAT (NCHARS+2) /* matches ^ in regular expr */
! 37: /* NCHARS is 2**n */
! 38: #define MAXLIN 22
! 39:
! 40: #define type(v) (v)->nobj /* badly overloaded here */
! 41: #define info(v) (v)->ntype /* badly overloaded here */
! 42: #define left(v) (v)->narg[0]
! 43: #define right(v) (v)->narg[1]
! 44: #define parent(v) (v)->nnext
! 45:
! 46: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
! 47: #define ELEAF case EMPTYRE: /* empty string in regexp */
! 48: #define UNARY case STAR: case PLUS: case QUEST:
! 49:
! 50: /* encoding in tree Nodes:
! 51: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
! 52: left is index, right contains value or pointer to value
! 53: unary (STAR, PLUS, QUEST): left is child, right is null
! 54: binary (CAT, OR): left and right are children
! 55: parent contains pointer to parent
! 56: */
! 57:
! 58:
! 59: int *setvec;
! 60: int *tmpset;
! 61: int maxsetvec = 0;
! 62:
! 63: int rtok; /* next token in current re */
! 64: int rlxval;
! 65: static uschar *rlxstr;
! 66: static uschar *prestr; /* current position in current re */
! 67: static uschar *lastre; /* origin of last re */
! 68:
! 69: static int setcnt;
! 70: static int poscnt;
! 71:
! 72: char *patbeg;
! 73: int patlen;
! 74:
! 75: #define NFA 20 /* cache this many dynamic fa's */
! 76: fa *fatab[NFA];
! 77: int nfatab = 0; /* entries in fatab */
! 78:
! 79: fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
! 80: {
! 81: int i, use, nuse;
! 82: fa *pfa;
! 83: static int now = 1;
! 84:
! 85: if (setvec == 0) { /* first time through any RE */
! 86: maxsetvec = MAXLIN;
! 87: setvec = (int *) malloc(maxsetvec * sizeof(int));
! 88: tmpset = (int *) malloc(maxsetvec * sizeof(int));
! 89: if (setvec == 0 || tmpset == 0)
! 90: overflo("out of space initializing makedfa");
! 91: }
! 92:
! 93: if (compile_time) /* a constant for sure */
! 94: return mkdfa(s, anchor);
! 95: for (i = 0; i < nfatab; i++) /* is it there already? */
! 96: if (fatab[i]->anchor == anchor
! 97: && strcmp((const char *) fatab[i]->restr, s) == 0) {
! 98: fatab[i]->use = now++;
! 99: return fatab[i];
! 100: }
! 101: pfa = mkdfa(s, anchor);
! 102: if (nfatab < NFA) { /* room for another */
! 103: fatab[nfatab] = pfa;
! 104: fatab[nfatab]->use = now++;
! 105: nfatab++;
! 106: return pfa;
! 107: }
! 108: use = fatab[0]->use; /* replace least-recently used */
! 109: nuse = 0;
! 110: for (i = 1; i < nfatab; i++)
! 111: if (fatab[i]->use < use) {
! 112: use = fatab[i]->use;
! 113: nuse = i;
! 114: }
! 115: freefa(fatab[nuse]);
! 116: fatab[nuse] = pfa;
! 117: pfa->use = now++;
! 118: return pfa;
! 119: }
! 120:
! 121: fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
! 122: /* anchor = 1 for anchored matches, else 0 */
! 123: {
! 124: Node *p, *p1;
! 125: fa *f;
! 126:
! 127: p = reparse(s);
! 128: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
! 129: /* put ALL STAR in front of reg. exp. */
! 130: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
! 131: /* put FINAL after reg. exp. */
! 132:
! 133: poscnt = 0;
! 134: penter(p1); /* enter parent pointers and leaf indices */
! 135: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
! 136: overflo("out of space for fa");
! 137: f->accept = poscnt-1; /* penter has computed number of positions in re */
! 138: cfoll(f, p1); /* set up follow sets */
! 139: freetr(p1);
! 140: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
! 141: overflo("out of space in makedfa");
! 142: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
! 143: overflo("out of space in makedfa");
! 144: *f->posns[1] = 0;
! 145: f->initstat = makeinit(f, anchor);
! 146: f->anchor = anchor;
! 147: f->restr = (uschar *) tostring(s);
! 148: return f;
! 149: }
! 150:
! 151: int makeinit(fa *f, int anchor)
! 152: {
! 153: int i, k;
! 154:
! 155: f->curstat = 2;
! 156: f->out[2] = 0;
! 157: f->reset = 0;
! 158: k = *(f->re[0].lfollow);
! 159: xfree(f->posns[2]);
! 160: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
! 161: overflo("out of space in makeinit");
! 162: for (i=0; i <= k; i++) {
! 163: (f->posns[2])[i] = (f->re[0].lfollow)[i];
! 164: }
! 165: if ((f->posns[2])[1] == f->accept)
! 166: f->out[2] = 1;
! 167: for (i=0; i < NCHARS; i++)
! 168: f->gototab[2][i] = 0;
! 169: f->curstat = cgoto(f, 2, HAT);
! 170: if (anchor) {
! 171: *f->posns[2] = k-1; /* leave out position 0 */
! 172: for (i=0; i < k; i++) {
! 173: (f->posns[0])[i] = (f->posns[2])[i];
! 174: }
! 175:
! 176: f->out[0] = f->out[2];
! 177: if (f->curstat != 2)
! 178: --(*f->posns[f->curstat]);
! 179: }
! 180: return f->curstat;
! 181: }
! 182:
! 183: void penter(Node *p) /* set up parent pointers and leaf indices */
! 184: {
! 185: switch (type(p)) {
! 186: ELEAF
! 187: LEAF
! 188: info(p) = poscnt;
! 189: poscnt++;
! 190: break;
! 191: UNARY
! 192: penter(left(p));
! 193: parent(left(p)) = p;
! 194: break;
! 195: case CAT:
! 196: case OR:
! 197: penter(left(p));
! 198: penter(right(p));
! 199: parent(left(p)) = p;
! 200: parent(right(p)) = p;
! 201: break;
! 202: default: /* can't happen */
! 203: FATAL("can't happen: unknown type %d in penter", type(p));
! 204: break;
! 205: }
! 206: }
! 207:
! 208: void freetr(Node *p) /* free parse tree */
! 209: {
! 210: switch (type(p)) {
! 211: ELEAF
! 212: LEAF
! 213: xfree(p);
! 214: break;
! 215: UNARY
! 216: freetr(left(p));
! 217: xfree(p);
! 218: break;
! 219: case CAT:
! 220: case OR:
! 221: freetr(left(p));
! 222: freetr(right(p));
! 223: xfree(p);
! 224: break;
! 225: default: /* can't happen */
! 226: FATAL("can't happen: unknown type %d in freetr", type(p));
! 227: break;
! 228: }
! 229: }
! 230:
! 231: /* in the parsing of regular expressions, metacharacters like . have */
! 232: /* to be seen literally; \056 is not a metacharacter. */
! 233:
! 234: int hexstr(char **pp) /* find and eval hex string at pp, return new p */
! 235: { /* only pick up one 8-bit byte (2 chars) */
! 236: uschar *p;
! 237: int n = 0;
! 238: int i;
! 239:
! 240: for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
! 241: if (isdigit(*p))
! 242: n = 16 * n + *p - '0';
! 243: else if (*p >= 'a' && *p <= 'f')
! 244: n = 16 * n + *p - 'a' + 10;
! 245: else if (*p >= 'A' && *p <= 'F')
! 246: n = 16 * n + *p - 'A' + 10;
! 247: }
! 248: *pp = (char *) p;
! 249: return n;
! 250: }
! 251:
! 252: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
! 253:
! 254: int quoted(char **pp) /* pick up next thing after a \\ */
! 255: /* and increment *pp */
! 256: {
! 257: char *p = *pp;
! 258: int c;
! 259:
! 260: if ((c = *p++) == 't')
! 261: c = '\t';
! 262: else if (c == 'n')
! 263: c = '\n';
! 264: else if (c == 'f')
! 265: c = '\f';
! 266: else if (c == 'r')
! 267: c = '\r';
! 268: else if (c == 'b')
! 269: c = '\b';
! 270: else if (c == '\\')
! 271: c = '\\';
! 272: else if (c == 'x') { /* hexadecimal goo follows */
! 273: c = hexstr(&p); /* this adds a null if number is invalid */
! 274: } else if (isoctdigit(c)) { /* \d \dd \ddd */
! 275: int n = c - '0';
! 276: if (isoctdigit(*p)) {
! 277: n = 8 * n + *p++ - '0';
! 278: if (isoctdigit(*p))
! 279: n = 8 * n + *p++ - '0';
! 280: }
! 281: c = n;
! 282: } /* else */
! 283: /* c = c; */
! 284: *pp = p;
! 285: return c;
! 286: }
! 287:
! 288: char *cclenter(const char *argp) /* add a character class */
! 289: {
! 290: int i, c, c2;
! 291: uschar *p = (uschar *) argp;
! 292: uschar *op, *bp;
! 293: static uschar *buf = 0;
! 294: static int bufsz = 100;
! 295:
! 296: op = p;
! 297: if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
! 298: FATAL("out of space for character class [%.10s...] 1", p);
! 299: bp = buf;
! 300: for (i = 0; (c = *p++) != 0; ) {
! 301: if (c == '\\') {
! 302: c = quoted((char **) &p);
! 303: } else if (c == '-' && i > 0 && bp[-1] != 0) {
! 304: if (*p != 0) {
! 305: c = bp[-1];
! 306: c2 = *p++;
! 307: if (c2 == '\\')
! 308: c2 = quoted((char **) &p);
! 309: if (c > c2) { /* empty; ignore */
! 310: bp--;
! 311: i--;
! 312: continue;
! 313: }
! 314: while (c < c2) {
! 315: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
! 316: FATAL("out of space for character class [%.10s...] 2", p);
! 317: *bp++ = ++c;
! 318: i++;
! 319: }
! 320: continue;
! 321: }
! 322: }
! 323: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
! 324: FATAL("out of space for character class [%.10s...] 3", p);
! 325: *bp++ = c;
! 326: i++;
! 327: }
! 328: *bp = 0;
! 329: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
! 330: xfree(op);
! 331: return (char *) tostring((char *) buf);
! 332: }
! 333:
! 334: void overflo(const char *s)
! 335: {
! 336: FATAL("regular expression too big: %.30s...", s);
! 337: }
! 338:
! 339: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
! 340: {
! 341: int i;
! 342: int *p;
! 343:
! 344: switch (type(v)) {
! 345: ELEAF
! 346: LEAF
! 347: f->re[info(v)].ltype = type(v);
! 348: f->re[info(v)].lval.np = right(v);
! 349: while (f->accept >= maxsetvec) { /* guessing here! */
! 350: maxsetvec *= 4;
! 351: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 352: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
! 353: if (setvec == 0 || tmpset == 0)
! 354: overflo("out of space in cfoll()");
! 355: }
! 356: for (i = 0; i <= f->accept; i++)
! 357: setvec[i] = 0;
! 358: setcnt = 0;
! 359: follow(v); /* computes setvec and setcnt */
! 360: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
! 361: overflo("out of space building follow set");
! 362: f->re[info(v)].lfollow = p;
! 363: *p = setcnt;
! 364: for (i = f->accept; i >= 0; i--)
! 365: if (setvec[i] == 1)
! 366: *++p = i;
! 367: break;
! 368: UNARY
! 369: cfoll(f,left(v));
! 370: break;
! 371: case CAT:
! 372: case OR:
! 373: cfoll(f,left(v));
! 374: cfoll(f,right(v));
! 375: break;
! 376: default: /* can't happen */
! 377: FATAL("can't happen: unknown type %d in cfoll", type(v));
! 378: }
! 379: }
! 380:
! 381: int first(Node *p) /* collects initially active leaves of p into setvec */
! 382: /* returns 0 if p matches empty string */
! 383: {
! 384: int b, lp;
! 385:
! 386: switch (type(p)) {
! 387: ELEAF
! 388: LEAF
! 389: lp = info(p); /* look for high-water mark of subscripts */
! 390: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
! 391: maxsetvec *= 4;
! 392: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 393: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
! 394: if (setvec == 0 || tmpset == 0)
! 395: overflo("out of space in first()");
! 396: }
! 397: if (type(p) == EMPTYRE) {
! 398: setvec[lp] = 0;
! 399: return(0);
! 400: }
! 401: if (setvec[lp] != 1) {
! 402: setvec[lp] = 1;
! 403: setcnt++;
! 404: }
! 405: if (type(p) == CCL && (*(char *) right(p)) == '\0')
! 406: return(0); /* empty CCL */
! 407: else return(1);
! 408: case PLUS:
! 409: if (first(left(p)) == 0) return(0);
! 410: return(1);
! 411: case STAR:
! 412: case QUEST:
! 413: first(left(p));
! 414: return(0);
! 415: case CAT:
! 416: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
! 417: return(1);
! 418: case OR:
! 419: b = first(right(p));
! 420: if (first(left(p)) == 0 || b == 0) return(0);
! 421: return(1);
! 422: }
! 423: FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
! 424: return(-1);
! 425: }
! 426:
! 427: void follow(Node *v) /* collects leaves that can follow v into setvec */
! 428: {
! 429: Node *p;
! 430:
! 431: if (type(v) == FINAL)
! 432: return;
! 433: p = parent(v);
! 434: switch (type(p)) {
! 435: case STAR:
! 436: case PLUS:
! 437: first(v);
! 438: follow(p);
! 439: return;
! 440:
! 441: case OR:
! 442: case QUEST:
! 443: follow(p);
! 444: return;
! 445:
! 446: case CAT:
! 447: if (v == left(p)) { /* v is left child of p */
! 448: if (first(right(p)) == 0) {
! 449: follow(p);
! 450: return;
! 451: }
! 452: } else /* v is right child */
! 453: follow(p);
! 454: return;
! 455: }
! 456: }
! 457:
! 458: int member(int c, const char *sarg) /* is c in s? */
! 459: {
! 460: uschar *s = (uschar *) sarg;
! 461:
! 462: while (*s)
! 463: if (c == *s++)
! 464: return(1);
! 465: return(0);
! 466: }
! 467:
! 468: int match(fa *f, const char *p0) /* shortest match ? */
! 469: {
! 470: int s, ns;
! 471: uschar *p = (uschar *) p0;
! 472:
! 473: s = f->reset ? makeinit(f,0) : f->initstat;
! 474: if (f->out[s])
! 475: return(1);
! 476: do {
! 477: /* assert(*p < NCHARS); */
! 478: if ((ns = f->gototab[s][*p]) != 0)
! 479: s = ns;
! 480: else
! 481: s = cgoto(f, s, *p);
! 482: if (f->out[s])
! 483: return(1);
! 484: } while (*p++ != 0);
! 485: return(0);
! 486: }
! 487:
! 488: int pmatch(fa *f, const char *p0) /* longest match, for sub */
! 489: {
! 490: int s, ns;
! 491: uschar *p = (uschar *) p0;
! 492: uschar *q;
! 493: int i, k;
! 494:
! 495: /* s = f->reset ? makeinit(f,1) : f->initstat; */
! 496: if (f->reset) {
! 497: f->initstat = s = makeinit(f,1);
! 498: } else {
! 499: s = f->initstat;
! 500: }
! 501: patbeg = (char *) p;
! 502: patlen = -1;
! 503: do {
! 504: q = p;
! 505: do {
! 506: if (f->out[s]) /* final state */
! 507: patlen = q-p;
! 508: /* assert(*q < NCHARS); */
! 509: if ((ns = f->gototab[s][*q]) != 0)
! 510: s = ns;
! 511: else
! 512: s = cgoto(f, s, *q);
! 513: if (s == 1) { /* no transition */
! 514: if (patlen >= 0) {
! 515: patbeg = (char *) p;
! 516: return(1);
! 517: }
! 518: else
! 519: goto nextin; /* no match */
! 520: }
! 521: } while (*q++ != 0);
! 522: if (f->out[s])
! 523: patlen = q-p-1; /* don't count $ */
! 524: if (patlen >= 0) {
! 525: patbeg = (char *) p;
! 526: return(1);
! 527: }
! 528: nextin:
! 529: s = 2;
! 530: if (f->reset) {
! 531: for (i = 2; i <= f->curstat; i++)
! 532: xfree(f->posns[i]);
! 533: k = *f->posns[0];
! 534: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
! 535: overflo("out of space in pmatch");
! 536: for (i = 0; i <= k; i++)
! 537: (f->posns[2])[i] = (f->posns[0])[i];
! 538: f->initstat = f->curstat = 2;
! 539: f->out[2] = f->out[0];
! 540: for (i = 0; i < NCHARS; i++)
! 541: f->gototab[2][i] = 0;
! 542: }
! 543: } while (*p++ != 0);
! 544: return (0);
! 545: }
! 546:
! 547: int nematch(fa *f, const char *p0) /* non-empty match, for sub */
! 548: {
! 549: int s, ns;
! 550: uschar *p = (uschar *) p0;
! 551: uschar *q;
! 552: int i, k;
! 553:
! 554: /* s = f->reset ? makeinit(f,1) : f->initstat; */
! 555: if (f->reset) {
! 556: f->initstat = s = makeinit(f,1);
! 557: } else {
! 558: s = f->initstat;
! 559: }
! 560: patlen = -1;
! 561: while (*p) {
! 562: q = p;
! 563: do {
! 564: if (f->out[s]) /* final state */
! 565: patlen = q-p;
! 566: /* assert(*q < NCHARS); */
! 567: if ((ns = f->gototab[s][*q]) != 0)
! 568: s = ns;
! 569: else
! 570: s = cgoto(f, s, *q);
! 571: if (s == 1) { /* no transition */
! 572: if (patlen > 0) {
! 573: patbeg = (char *) p;
! 574: return(1);
! 575: } else
! 576: goto nnextin; /* no nonempty match */
! 577: }
! 578: } while (*q++ != 0);
! 579: if (f->out[s])
! 580: patlen = q-p-1; /* don't count $ */
! 581: if (patlen > 0 ) {
! 582: patbeg = (char *) p;
! 583: return(1);
! 584: }
! 585: nnextin:
! 586: s = 2;
! 587: if (f->reset) {
! 588: for (i = 2; i <= f->curstat; i++)
! 589: xfree(f->posns[i]);
! 590: k = *f->posns[0];
! 591: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
! 592: overflo("out of state space");
! 593: for (i = 0; i <= k; i++)
! 594: (f->posns[2])[i] = (f->posns[0])[i];
! 595: f->initstat = f->curstat = 2;
! 596: f->out[2] = f->out[0];
! 597: for (i = 0; i < NCHARS; i++)
! 598: f->gototab[2][i] = 0;
! 599: }
! 600: p++;
! 601: }
! 602: return (0);
! 603: }
! 604:
! 605: Node *reparse(const char *p) /* parses regular expression pointed to by p */
! 606: { /* uses relex() to scan regular expression */
! 607: Node *np;
! 608:
! 609: dprintf( ("reparse <%s>\n", p) );
! 610: lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
! 611: rtok = relex();
! 612: /* GNU compatibility: an empty regexp matches anything */
! 613: if (rtok == '\0') {
! 614: /* FATAL("empty regular expression"); previous */
! 615: return(op2(EMPTYRE, NIL, NIL));
! 616: }
! 617: np = regexp();
! 618: if (rtok != '\0')
! 619: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
! 620: return(np);
! 621: }
! 622:
! 623: Node *regexp(void) /* top-level parse of reg expr */
! 624: {
! 625: return (alt(concat(primary())));
! 626: }
! 627:
! 628: Node *primary(void)
! 629: {
! 630: Node *np;
! 631:
! 632: switch (rtok) {
! 633: case CHAR:
! 634: np = op2(CHAR, NIL, itonp(rlxval));
! 635: rtok = relex();
! 636: return (unary(np));
! 637: case ALL:
! 638: rtok = relex();
! 639: return (unary(op2(ALL, NIL, NIL)));
! 640: case EMPTYRE:
! 641: rtok = relex();
! 642: return (unary(op2(ALL, NIL, NIL)));
! 643: case DOT:
! 644: rtok = relex();
! 645: return (unary(op2(DOT, NIL, NIL)));
! 646: case CCL:
! 647: np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
! 648: rtok = relex();
! 649: return (unary(np));
! 650: case NCCL:
! 651: np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
! 652: rtok = relex();
! 653: return (unary(np));
! 654: case '^':
! 655: rtok = relex();
! 656: return (unary(op2(CHAR, NIL, itonp(HAT))));
! 657: case '$':
! 658: rtok = relex();
! 659: return (unary(op2(CHAR, NIL, NIL)));
! 660: case '(':
! 661: rtok = relex();
! 662: if (rtok == ')') { /* special pleading for () */
! 663: rtok = relex();
! 664: return unary(op2(CCL, NIL, (Node *) tostring("")));
! 665: }
! 666: np = regexp();
! 667: if (rtok == ')') {
! 668: rtok = relex();
! 669: return (unary(np));
! 670: }
! 671: else
! 672: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
! 673: default:
! 674: FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
! 675: }
! 676: return 0; /*NOTREACHED*/
! 677: }
! 678:
! 679: Node *concat(Node *np)
! 680: {
! 681: switch (rtok) {
! 682: case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
! 683: return (concat(op2(CAT, np, primary())));
! 684: }
! 685: return (np);
! 686: }
! 687:
! 688: Node *alt(Node *np)
! 689: {
! 690: if (rtok == OR) {
! 691: rtok = relex();
! 692: return (alt(op2(OR, np, concat(primary()))));
! 693: }
! 694: return (np);
! 695: }
! 696:
! 697: Node *unary(Node *np)
! 698: {
! 699: switch (rtok) {
! 700: case STAR:
! 701: rtok = relex();
! 702: return (unary(op2(STAR, np, NIL)));
! 703: case PLUS:
! 704: rtok = relex();
! 705: return (unary(op2(PLUS, np, NIL)));
! 706: case QUEST:
! 707: rtok = relex();
! 708: return (unary(op2(QUEST, np, NIL)));
! 709: default:
! 710: return (np);
! 711: }
! 712: }
! 713:
! 714: /*
! 715: * Character class definitions conformant to the POSIX locale as
! 716: * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
! 717: * and operating character sets are both ASCII (ISO646) or supersets
! 718: * thereof.
! 719: *
! 720: * Note that to avoid overflowing the temporary buffer used in
! 721: * relex(), the expanded character class (prior to range expansion)
! 722: * must be less than twice the size of their full name.
! 723: */
! 724:
! 725: /* Because isblank doesn't show up in any of the header files on any
! 726: * system i use, it's defined here. if some other locale has a richer
! 727: * definition of "blank", define HAS_ISBLANK and provide your own
! 728: * version.
! 729: * the parentheses here are an attempt to find a path through the maze
! 730: * of macro definition and/or function and/or version provided. thanks
! 731: * to nelson beebe for the suggestion; let's see if it works everywhere.
! 732: */
! 733:
! 734: /* #define HAS_ISBLANK */
! 735: #ifndef HAS_ISBLANK
! 736:
! 737: int (xisblank)(int c)
! 738: {
! 739: return c==' ' || c=='\t';
! 740: }
! 741:
! 742: #endif
! 743:
! 744: struct charclass {
! 745: const char *cc_name;
! 746: int cc_namelen;
! 747: int (*cc_func)(int);
! 748: } charclasses[] = {
! 749: { "alnum", 5, isalnum },
! 750: { "alpha", 5, isalpha },
! 751: { "blank", 5, isspace }, /* was isblank */
! 752: { "cntrl", 5, iscntrl },
! 753: { "digit", 5, isdigit },
! 754: { "graph", 5, isgraph },
! 755: { "lower", 5, islower },
! 756: { "print", 5, isprint },
! 757: { "punct", 5, ispunct },
! 758: { "space", 5, isspace },
! 759: { "upper", 5, isupper },
! 760: { "xdigit", 6, isxdigit },
! 761: { NULL, 0, NULL },
! 762: };
! 763:
! 764:
! 765: int relex(void) /* lexical analyzer for reparse */
! 766: {
! 767: int c, n;
! 768: int cflag;
! 769: static uschar *buf = 0;
! 770: static int bufsz = 100;
! 771: uschar *bp;
! 772: struct charclass *cc;
! 773: int i;
! 774:
! 775: switch (c = *prestr++) {
! 776: case '|': return OR;
! 777: case '*': return STAR;
! 778: case '+': return PLUS;
! 779: case '?': return QUEST;
! 780: case '.': return DOT;
! 781: case '\0': prestr--; return '\0';
! 782: case '^':
! 783: case '$':
! 784: case '(':
! 785: case ')':
! 786: return c;
! 787: case '\\':
! 788: rlxval = quoted((char **) &prestr);
! 789: return CHAR;
! 790: default:
! 791: rlxval = c;
! 792: return CHAR;
! 793: case '[':
! 794: if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
! 795: FATAL("out of space in reg expr %.10s..", lastre);
! 796: bp = buf;
! 797: if (*prestr == '^') {
! 798: cflag = 1;
! 799: prestr++;
! 800: }
! 801: else
! 802: cflag = 0;
! 803: n = 2 * strlen((const char *) prestr)+1;
! 804: if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
! 805: FATAL("out of space for reg expr %.10s...", lastre);
! 806: for (; ; ) {
! 807: if ((c = *prestr++) == '\\') {
! 808: *bp++ = '\\';
! 809: if ((c = *prestr++) == '\0')
! 810: FATAL("nonterminated character class %.20s...", lastre);
! 811: *bp++ = c;
! 812: /* } else if (c == '\n') { */
! 813: /* FATAL("newline in character class %.20s...", lastre); */
! 814: } else if (c == '[' && *prestr == ':') {
! 815: /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
! 816: for (cc = charclasses; cc->cc_name; cc++)
! 817: if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
! 818: break;
! 819: if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
! 820: prestr[2 + cc->cc_namelen] == ']') {
! 821: prestr += cc->cc_namelen + 3;
! 822: for (i = 0; i < NCHARS; i++) {
! 823: if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
! 824: FATAL("out of space for reg expr %.10s...", lastre);
! 825: if (cc->cc_func(i)) {
! 826: *bp++ = i;
! 827: n++;
! 828: }
! 829: }
! 830: } else
! 831: *bp++ = c;
! 832: } else if (c == '\0') {
! 833: FATAL("nonterminated character class %.20s", lastre);
! 834: } else if (bp == buf) { /* 1st char is special */
! 835: *bp++ = c;
! 836: } else if (c == ']') {
! 837: *bp++ = 0;
! 838: rlxstr = (uschar *) tostring((char *) buf);
! 839: if (cflag == 0)
! 840: return CCL;
! 841: else
! 842: return NCCL;
! 843: } else
! 844: *bp++ = c;
! 845: }
! 846: }
! 847: }
! 848:
! 849: int cgoto(fa *f, int s, int c)
! 850: {
! 851: int i, j, k;
! 852: int *p, *q;
! 853:
! 854: assert(c == HAT || c < NCHARS);
! 855: while (f->accept >= maxsetvec) { /* guessing here! */
! 856: maxsetvec *= 4;
! 857: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 858: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
! 859: if (setvec == 0 || tmpset == 0)
! 860: overflo("out of space in cgoto()");
! 861: }
! 862: for (i = 0; i <= f->accept; i++)
! 863: setvec[i] = 0;
! 864: setcnt = 0;
! 865: /* compute positions of gototab[s,c] into setvec */
! 866: p = f->posns[s];
! 867: for (i = 1; i <= *p; i++) {
! 868: if ((k = f->re[p[i]].ltype) != FINAL) {
! 869: if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
! 870: || (k == DOT && c != 0 && c != HAT)
! 871: || (k == ALL && c != 0)
! 872: || (k == EMPTYRE && c != 0)
! 873: || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
! 874: || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
! 875: q = f->re[p[i]].lfollow;
! 876: for (j = 1; j <= *q; j++) {
! 877: if (q[j] >= maxsetvec) {
! 878: maxsetvec *= 4;
! 879: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
! 880: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
! 881: if (setvec == 0 || tmpset == 0)
! 882: overflo("cgoto overflow");
! 883: }
! 884: if (setvec[q[j]] == 0) {
! 885: setcnt++;
! 886: setvec[q[j]] = 1;
! 887: }
! 888: }
! 889: }
! 890: }
! 891: }
! 892: /* determine if setvec is a previous state */
! 893: tmpset[0] = setcnt;
! 894: j = 1;
! 895: for (i = f->accept; i >= 0; i--)
! 896: if (setvec[i]) {
! 897: tmpset[j++] = i;
! 898: }
! 899: /* tmpset == previous state? */
! 900: for (i = 1; i <= f->curstat; i++) {
! 901: p = f->posns[i];
! 902: if ((k = tmpset[0]) != p[0])
! 903: goto different;
! 904: for (j = 1; j <= k; j++)
! 905: if (tmpset[j] != p[j])
! 906: goto different;
! 907: /* setvec is state i */
! 908: f->gototab[s][c] = i;
! 909: return i;
! 910: different:;
! 911: }
! 912:
! 913: /* add tmpset to current set of states */
! 914: if (f->curstat >= NSTATES-1) {
! 915: f->curstat = 2;
! 916: f->reset = 1;
! 917: for (i = 2; i < NSTATES; i++)
! 918: xfree(f->posns[i]);
! 919: } else
! 920: ++(f->curstat);
! 921: for (i = 0; i < NCHARS; i++)
! 922: f->gototab[f->curstat][i] = 0;
! 923: xfree(f->posns[f->curstat]);
! 924: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
! 925: overflo("out of space in cgoto");
! 926:
! 927: f->posns[f->curstat] = p;
! 928: f->gototab[s][c] = f->curstat;
! 929: for (i = 0; i <= setcnt; i++)
! 930: p[i] = tmpset[i];
! 931: if (setvec[f->accept])
! 932: f->out[f->curstat] = 1;
! 933: else
! 934: f->out[f->curstat] = 0;
! 935: return f->curstat;
! 936: }
! 937:
! 938:
! 939: void freefa(fa *f) /* free a finite automaton */
! 940: {
! 941: int i;
! 942:
! 943: if (f == NULL)
! 944: return;
! 945: for (i = 0; i <= f->curstat; i++)
! 946: xfree(f->posns[i]);
! 947: for (i = 0; i <= f->accept; i++) {
! 948: xfree(f->re[i].lfollow);
! 949: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
! 950: xfree((f->re[i].lval.np));
! 951: }
! 952: xfree(f->restr);
! 953: xfree(f);
! 954: }
CVSweb interface <joel.bertrand@systella.fr>