File:  [local] / rpl / rplawk / b.c
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Wed Jun 12 09:47:52 2013 UTC (10 years, 9 months ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, rpl-4_1_33, rpl-4_1_32, rpl-4_1_31, rpl-4_1_30, rpl-4_1_29, rpl-4_1_28, rpl-4_1_27, rpl-4_1_26, rpl-4_1_25, rpl-4_1_24, rpl-4_1_23, rpl-4_1_22, rpl-4_1_21, rpl-4_1_20, rpl-4_1_19, rpl-4_1_18, rpl-4_1_17, rpl-4_1_16, rpl-4_1_15, rpl-4_1_14, HEAD
Quelques patches pour rplawk et ncurses.

    1: /****************************************************************
    2: Copyright (C) Lucent Technologies 1997
    3: All Rights Reserved
    4: 
    5: Permission to use, copy, modify, and distribute this software and
    6: its documentation for any purpose and without fee is hereby
    7: granted, provided that the above copyright notice appear in all
    8: copies and that both that the copyright notice and this
    9: permission notice and warranty disclaimer appear in supporting
   10: documentation, and that the name Lucent Technologies or any of
   11: its entities not be used in advertising or publicity pertaining
   12: to distribution of the software without specific, written prior
   13: permission.
   14: 
   15: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
   16: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
   17: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
   18: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   19: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
   20: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
   21: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
   22: THIS SOFTWARE.
   23: ****************************************************************/
   24: 
   25: /* lasciate ogne speranza, voi ch'intrate. */
   26: 
   27: #define DEBUG
   28: 
   29: #include <ctype.h>
   30: #include <stdio.h>
   31: #include <string.h>
   32: #include <stdlib.h>
   33: #include "awk.h"
   34: #include "ytab.h"
   35: 
   36: #define HAT (NCHARS+2)  /* matches ^ in regular expr */
   37:                 /* NCHARS is 2**n */
   38: #define MAXLIN 22
   39: 
   40: #define type(v)     (v)->nobj   /* badly overloaded here */
   41: #define info(v)     (v)->ntype  /* badly overloaded here */
   42: #define left(v)     (v)->narg[0]
   43: #define right(v)    (v)->narg[1]
   44: #define parent(v)   (v)->nnext
   45: 
   46: #define LEAF    case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
   47: #define ELEAF   case EMPTYRE:       /* empty string in regexp */
   48: #define UNARY   case STAR: case PLUS: case QUEST:
   49: 
   50: /* encoding in tree Nodes:
   51:     leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
   52:         left is index, right contains value or pointer to value
   53:     unary (STAR, PLUS, QUEST): left is child, right is null
   54:     binary (CAT, OR): left and right are children
   55:     parent contains pointer to parent
   56: */
   57: 
   58: 
   59: int *setvec;
   60: int *tmpset;
   61: int maxsetvec = 0;
   62: 
   63: int rtok;       /* next token in current re */
   64: int rlxval;
   65: static uschar   *rlxstr;
   66: static uschar   *prestr;    /* current position in current re */
   67: static uschar   *lastre;    /* origin of last re */
   68: 
   69: static  int setcnt;
   70: static  int poscnt;
   71: 
   72: char    *patbeg;
   73: int patlen;
   74: 
   75: #define NFA 20  /* cache this many dynamic fa's */
   76: fa  *fatab[NFA];
   77: int nfatab  = 0;    /* entries in fatab */
   78: 
   79: fa *makedfa(const char *s, int anchor)  /* returns dfa for reg expr s */
   80: {
   81:     int i, use, nuse;
   82:     fa *pfa;
   83:     static int now = 1;
   84: 
   85:     if (setvec == 0) {  /* first time through any RE */
   86:         maxsetvec = MAXLIN;
   87:         setvec = (int *) malloc(maxsetvec * sizeof(int));
   88:         tmpset = (int *) malloc(maxsetvec * sizeof(int));
   89:         if (setvec == 0 || tmpset == 0)
   90:             overflo("out of space initializing makedfa");
   91:     }
   92: 
   93:     if (compile_time)   /* a constant for sure */
   94:         return mkdfa(s, anchor);
   95:     for (i = 0; i < nfatab; i++)    /* is it there already? */
   96:         if (fatab[i]->anchor == anchor
   97:           && strcmp((const char *) fatab[i]->restr, s) == 0) {
   98:             fatab[i]->use = now++;
   99:             return fatab[i];
  100:         }
  101:     pfa = mkdfa(s, anchor);
  102:     if (nfatab < NFA) { /* room for another */
  103:         fatab[nfatab] = pfa;
  104:         fatab[nfatab]->use = now++;
  105:         nfatab++;
  106:         return pfa;
  107:     }
  108:     use = fatab[0]->use;    /* replace least-recently used */
  109:     nuse = 0;
  110:     for (i = 1; i < nfatab; i++)
  111:         if (fatab[i]->use < use) {
  112:             use = fatab[i]->use;
  113:             nuse = i;
  114:         }
  115:     freefa(fatab[nuse]);
  116:     fatab[nuse] = pfa;
  117:     pfa->use = now++;
  118:     return pfa;
  119: }
  120: 
  121: fa *mkdfa(const char *s, int anchor)    /* does the real work of making a dfa */
  122:                 /* anchor = 1 for anchored matches, else 0 */
  123: {
  124:     Node *p, *p1;
  125:     fa *f;
  126: 
  127:     p = reparse(s);
  128:     p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
  129:         /* put ALL STAR in front of reg.  exp. */
  130:     p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
  131:         /* put FINAL after reg.  exp. */
  132: 
  133:     poscnt = 0;
  134:     penter(p1); /* enter parent pointers and leaf indices */
  135:     if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
  136:         overflo("out of space for fa");
  137:     f->accept = poscnt-1;   /* penter has computed number of positions in re */
  138:     cfoll(f, p1);   /* set up follow sets */
  139:     freetr(p1);
  140:     if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
  141:             overflo("out of space in makedfa");
  142:     if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
  143:         overflo("out of space in makedfa");
  144:     *f->posns[1] = 0;
  145:     f->initstat = makeinit(f, anchor);
  146:     f->anchor = anchor;
  147:     f->restr = (uschar *) tostring(s);
  148:     return f;
  149: }
  150: 
  151: int makeinit(fa *f, int anchor)
  152: {
  153:     int i, k;
  154: 
  155:     f->curstat = 2;
  156:     f->out[2] = 0;
  157:     f->reset = 0;
  158:     k = *(f->re[0].lfollow);
  159:     xfree(f->posns[2]);         
  160:     if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  161:         overflo("out of space in makeinit");
  162:     for (i=0; i <= k; i++) {
  163:         (f->posns[2])[i] = (f->re[0].lfollow)[i];
  164:     }
  165:     if ((f->posns[2])[1] == f->accept)
  166:         f->out[2] = 1;
  167:     for (i=0; i < NCHARS; i++)
  168:         f->gototab[2][i] = 0;
  169:     f->curstat = cgoto(f, 2, HAT);
  170:     if (anchor) {
  171:         *f->posns[2] = k-1; /* leave out position 0 */
  172:         for (i=0; i < k; i++) {
  173:             (f->posns[0])[i] = (f->posns[2])[i];
  174:         }
  175: 
  176:         f->out[0] = f->out[2];
  177:         if (f->curstat != 2)
  178:             --(*f->posns[f->curstat]);
  179:     }
  180:     return f->curstat;
  181: }
  182: 
  183: void penter(Node *p)    /* set up parent pointers and leaf indices */
  184: {
  185:     switch (type(p)) {
  186:     ELEAF
  187:     LEAF
  188:         info(p) = poscnt;
  189:         poscnt++;
  190:         break;
  191:     UNARY
  192:         penter(left(p));
  193:         parent(left(p)) = p;
  194:         break;
  195:     case CAT:
  196:     case OR:
  197:         penter(left(p));
  198:         penter(right(p));
  199:         parent(left(p)) = p;
  200:         parent(right(p)) = p;
  201:         break;
  202:     default:    /* can't happen */
  203:         FATAL("can't happen: unknown type %d in penter", type(p));
  204:         break;
  205:     }
  206: }
  207: 
  208: void freetr(Node *p)    /* free parse tree */
  209: {
  210:     switch (type(p)) {
  211:     ELEAF
  212:     LEAF
  213:         xfree(p);
  214:         break;
  215:     UNARY
  216:         freetr(left(p));
  217:         xfree(p);
  218:         break;
  219:     case CAT:
  220:     case OR:
  221:         freetr(left(p));
  222:         freetr(right(p));
  223:         xfree(p);
  224:         break;
  225:     default:    /* can't happen */
  226:         FATAL("can't happen: unknown type %d in freetr", type(p));
  227:         break;
  228:     }
  229: }
  230: 
  231: /* in the parsing of regular expressions, metacharacters like . have */
  232: /* to be seen literally;  \056 is not a metacharacter. */
  233: 
  234: int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
  235: {           /* only pick up one 8-bit byte (2 chars) */
  236:     uschar *p;
  237:     int n = 0;
  238:     int i;
  239: 
  240:     for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
  241:         if (isdigit(*p))
  242:             n = 16 * n + *p - '0';
  243:         else if (*p >= 'a' && *p <= 'f')
  244:             n = 16 * n + *p - 'a' + 10;
  245:         else if (*p >= 'A' && *p <= 'F')
  246:             n = 16 * n + *p - 'A' + 10;
  247:     }
  248:     *pp = (uschar *) p;
  249:     return n;
  250: }
  251: 
  252: #define isoctdigit(c) ((c) >= '0' && (c) <= '7')    /* multiple use of arg */
  253: 
  254: int quoted(uschar **pp) /* pick up next thing after a \\ */
  255:             /* and increment *pp */
  256: {
  257:     uschar *p = *pp;
  258:     int c;
  259: 
  260:     if ((c = *p++) == 't')
  261:         c = '\t';
  262:     else if (c == 'n')
  263:         c = '\n';
  264:     else if (c == 'f')
  265:         c = '\f';
  266:     else if (c == 'r')
  267:         c = '\r';
  268:     else if (c == 'b')
  269:         c = '\b';
  270:     else if (c == '\\')
  271:         c = '\\';
  272:     else if (c == 'x') {    /* hexadecimal goo follows */
  273:         c = hexstr(&p); /* this adds a null if number is invalid */
  274:     } else if (isoctdigit(c)) { /* \d \dd \ddd */
  275:         int n = c - '0';
  276:         if (isoctdigit(*p)) {
  277:             n = 8 * n + *p++ - '0';
  278:             if (isoctdigit(*p))
  279:                 n = 8 * n + *p++ - '0';
  280:         }
  281:         c = n;
  282:     } /* else */
  283:         /* c = c; */
  284:     *pp = p;
  285:     return c;
  286: }
  287: 
  288: char *cclenter(const char *argp)    /* add a character class */
  289: {
  290:     int i, c, c2;
  291:     uschar *p = (uschar *) argp;
  292:     uschar *op, *bp;
  293:     static uschar *buf = 0;
  294:     static int bufsz = 100;
  295: 
  296:     op = p;
  297:     if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
  298:         FATAL("out of space for character class [%.10s...] 1", p);
  299:     bp = buf;
  300:     for (i = 0; (c = *p++) != 0; ) {
  301:         if (c == '\\') {
  302:             c = quoted(&p);
  303:         } else if (c == '-' && i > 0 && bp[-1] != 0) {
  304:             if (*p != 0) {
  305:                 c = bp[-1];
  306:                 c2 = *p++;
  307:                 if (c2 == '\\')
  308:                     c2 = quoted(&p);
  309:                 if (c > c2) {   /* empty; ignore */
  310:                     bp--;
  311:                     i--;
  312:                     continue;
  313:                 }
  314:                 while (c < c2) {
  315:                     if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
  316:                         FATAL("out of space for character class [%.10s...] 2", p);
  317:                     *bp++ = ++c;
  318:                     i++;
  319:                 }
  320:                 continue;
  321:             }
  322:         }
  323:         if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
  324:             FATAL("out of space for character class [%.10s...] 3", p);
  325:         *bp++ = c;
  326:         i++;
  327:     }
  328:     *bp = 0;
  329:     dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
  330:     xfree(op);
  331:     return (char *) tostring((char *) buf);
  332: }
  333: 
  334: void overflo(const char *s)
  335: {
  336:     FATAL("regular expression too big: %.30s...", s);
  337: }
  338: 
  339: void cfoll(fa *f, Node *v)  /* enter follow set of each leaf of vertex v into lfollow[leaf] */
  340: {
  341:     int i;
  342:     int *p;
  343: 
  344:     switch (type(v)) {
  345:     ELEAF
  346:     LEAF
  347:         f->re[info(v)].ltype = type(v);
  348:         f->re[info(v)].lval.np = right(v);
  349:         while (f->accept >= maxsetvec) {    /* guessing here! */
  350:             maxsetvec *= 4;
  351:             setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  352:             tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  353:             if (setvec == 0 || tmpset == 0)
  354:                 overflo("out of space in cfoll()");
  355:         }
  356:         for (i = 0; i <= f->accept; i++)
  357:             setvec[i] = 0;
  358:         setcnt = 0;
  359:         follow(v);  /* computes setvec and setcnt */
  360:         if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
  361:             overflo("out of space building follow set");
  362:         f->re[info(v)].lfollow = p;
  363:         *p = setcnt;
  364:         for (i = f->accept; i >= 0; i--)
  365:             if (setvec[i] == 1)
  366:                 *++p = i;
  367:         break;
  368:     UNARY
  369:         cfoll(f,left(v));
  370:         break;
  371:     case CAT:
  372:     case OR:
  373:         cfoll(f,left(v));
  374:         cfoll(f,right(v));
  375:         break;
  376:     default:    /* can't happen */
  377:         FATAL("can't happen: unknown type %d in cfoll", type(v));
  378:     }
  379: }
  380: 
  381: int first(Node *p)  /* collects initially active leaves of p into setvec */
  382:             /* returns 0 if p matches empty string */
  383: {
  384:     int b, lp;
  385: 
  386:     switch (type(p)) {
  387:     ELEAF
  388:     LEAF
  389:         lp = info(p);   /* look for high-water mark of subscripts */
  390:         while (setcnt >= maxsetvec || lp >= maxsetvec) {    /* guessing here! */
  391:             maxsetvec *= 4;
  392:             setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  393:             tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  394:             if (setvec == 0 || tmpset == 0)
  395:                 overflo("out of space in first()");
  396:         }
  397:         if (type(p) == EMPTYRE) {
  398:             setvec[lp] = 0;
  399:             return(0);
  400:         }
  401:         if (setvec[lp] != 1) {
  402:             setvec[lp] = 1;
  403:             setcnt++;
  404:         }
  405:         if (type(p) == CCL && (*(char *) right(p)) == '\0')
  406:             return(0);      /* empty CCL */
  407:         else return(1);
  408:     case PLUS:
  409:         if (first(left(p)) == 0) return(0);
  410:         return(1);
  411:     case STAR:
  412:     case QUEST:
  413:         first(left(p));
  414:         return(0);
  415:     case CAT:
  416:         if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
  417:         return(1);
  418:     case OR:
  419:         b = first(right(p));
  420:         if (first(left(p)) == 0 || b == 0) return(0);
  421:         return(1);
  422:     }
  423:     FATAL("can't happen: unknown type %d in first", type(p));   /* can't happen */
  424:     return(-1);
  425: }
  426: 
  427: void follow(Node *v)    /* collects leaves that can follow v into setvec */
  428: {
  429:     Node *p;
  430: 
  431:     if (type(v) == FINAL)
  432:         return;
  433:     p = parent(v);
  434:     switch (type(p)) {
  435:     case STAR:
  436:     case PLUS:
  437:         first(v);
  438:         follow(p);
  439:         return;
  440: 
  441:     case OR:
  442:     case QUEST:
  443:         follow(p);
  444:         return;
  445: 
  446:     case CAT:
  447:         if (v == left(p)) { /* v is left child of p */
  448:             if (first(right(p)) == 0) {
  449:                 follow(p);
  450:                 return;
  451:             }
  452:         } else      /* v is right child */
  453:             follow(p);
  454:         return;
  455:     }
  456: }
  457: 
  458: int member(int c, const char *sarg) /* is c in s? */
  459: {
  460:     uschar *s = (uschar *) sarg;
  461: 
  462:     while (*s)
  463:         if (c == *s++)
  464:             return(1);
  465:     return(0);
  466: }
  467: 
  468: int match(fa *f, const char *p0)    /* shortest match ? */
  469: {
  470:     int s, ns;
  471:     uschar *p = (uschar *) p0;
  472: 
  473:     s = f->reset ? makeinit(f,0) : f->initstat;
  474:     if (f->out[s])
  475:         return(1);
  476:     do {
  477:         /* assert(*p < NCHARS); */
  478:         if ((ns = f->gototab[s][*p]) != 0)
  479:             s = ns;
  480:         else
  481:             s = cgoto(f, s, *p);
  482:         if (f->out[s])
  483:             return(1);
  484:     } while (*p++ != 0);
  485:     return(0);
  486: }
  487: 
  488: int pmatch(fa *f, const char *p0)   /* longest match, for sub */
  489: {
  490:     int s, ns;
  491:     uschar *p = (uschar *) p0;
  492:     uschar *q;
  493:     int i, k;
  494: 
  495:     /* s = f->reset ? makeinit(f,1) : f->initstat; */
  496:     if (f->reset) {
  497:         f->initstat = s = makeinit(f,1);
  498:     } else {
  499:         s = f->initstat;
  500:     }
  501:     patbeg = (char *) p;
  502:     patlen = -1;
  503:     do {
  504:         q = p;
  505:         do {
  506:             if (f->out[s])      /* final state */
  507:                 patlen = q-p;
  508:             /* assert(*q < NCHARS); */
  509:             if ((ns = f->gototab[s][*q]) != 0)
  510:                 s = ns;
  511:             else
  512:                 s = cgoto(f, s, *q);
  513:             if (s == 1) {   /* no transition */
  514:                 if (patlen >= 0) {
  515:                     patbeg = (char *) p;
  516:                     return(1);
  517:                 }
  518:                 else
  519:                     goto nextin;    /* no match */
  520:             }
  521:         } while (*q++ != 0);
  522:         if (f->out[s])
  523:             patlen = q-p-1; /* don't count $ */
  524:         if (patlen >= 0) {
  525:             patbeg = (char *) p;
  526:             return(1);
  527:         }
  528:     nextin:
  529:         s = 2;
  530:         if (f->reset) {
  531:             for (i = 2; i <= f->curstat; i++)
  532:                 xfree(f->posns[i]);
  533:             k = *f->posns[0];           
  534:             if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  535:                 overflo("out of space in pmatch");
  536:             for (i = 0; i <= k; i++)
  537:                 (f->posns[2])[i] = (f->posns[0])[i];
  538:             f->initstat = f->curstat = 2;
  539:             f->out[2] = f->out[0];
  540:             for (i = 0; i < NCHARS; i++)
  541:                 f->gototab[2][i] = 0;
  542:         }
  543:     } while (*p++ != 0);
  544:     return (0);
  545: }
  546: 
  547: int nematch(fa *f, const char *p0)  /* non-empty match, for sub */
  548: {
  549:     int s, ns;
  550:     uschar *p = (uschar *) p0;
  551:     uschar *q;
  552:     int i, k;
  553: 
  554:     /* s = f->reset ? makeinit(f,1) : f->initstat; */
  555:     if (f->reset) {
  556:         f->initstat = s = makeinit(f,1);
  557:     } else {
  558:         s = f->initstat;
  559:     }
  560:     patlen = -1;
  561:     while (*p) {
  562:         q = p;
  563:         do {
  564:             if (f->out[s])      /* final state */
  565:                 patlen = q-p;
  566:             /* assert(*q < NCHARS); */
  567:             if ((ns = f->gototab[s][*q]) != 0)
  568:                 s = ns;
  569:             else
  570:                 s = cgoto(f, s, *q);
  571:             if (s == 1) {   /* no transition */
  572:                 if (patlen > 0) {
  573:                     patbeg = (char *) p;
  574:                     return(1);
  575:                 } else
  576:                     goto nnextin;   /* no nonempty match */
  577:             }
  578:         } while (*q++ != 0);
  579:         if (f->out[s])
  580:             patlen = q-p-1; /* don't count $ */
  581:         if (patlen > 0 ) {
  582:             patbeg = (char *) p;
  583:             return(1);
  584:         }
  585:     nnextin:
  586:         s = 2;
  587:         if (f->reset) {
  588:             for (i = 2; i <= f->curstat; i++)
  589:                 xfree(f->posns[i]);
  590:             k = *f->posns[0];           
  591:             if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
  592:                 overflo("out of state space");
  593:             for (i = 0; i <= k; i++)
  594:                 (f->posns[2])[i] = (f->posns[0])[i];
  595:             f->initstat = f->curstat = 2;
  596:             f->out[2] = f->out[0];
  597:             for (i = 0; i < NCHARS; i++)
  598:                 f->gototab[2][i] = 0;
  599:         }
  600:         p++;
  601:     }
  602:     return (0);
  603: }
  604: 
  605: Node *reparse(const char *p)    /* parses regular expression pointed to by p */
  606: {           /* uses relex() to scan regular expression */
  607:     Node *np;
  608: 
  609:     dprintf( ("reparse <%s>\n", p) );
  610:     lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
  611:     rtok = relex();
  612:     /* GNU compatibility: an empty regexp matches anything */
  613:     if (rtok == '\0') {
  614:         /* FATAL("empty regular expression"); previous */
  615:         return(op2(EMPTYRE, NIL, NIL));
  616:     }
  617:     np = regexp();
  618:     if (rtok != '\0')
  619:         FATAL("syntax error in regular expression %s at %s", lastre, prestr);
  620:     return(np);
  621: }
  622: 
  623: Node *regexp(void)  /* top-level parse of reg expr */
  624: {
  625:     return (alt(concat(primary())));
  626: }
  627: 
  628: Node *primary(void)
  629: {
  630:     Node *np;
  631: 
  632:     switch (rtok) {
  633:     case CHAR:
  634:         np = op2(CHAR, NIL, itonp(rlxval));
  635:         rtok = relex();
  636:         return (unary(np));
  637:     case ALL:
  638:         rtok = relex();
  639:         return (unary(op2(ALL, NIL, NIL)));
  640:     case EMPTYRE:
  641:         rtok = relex();
  642:         return (unary(op2(ALL, NIL, NIL)));
  643:     case DOT:
  644:         rtok = relex();
  645:         return (unary(op2(DOT, NIL, NIL)));
  646:     case CCL:
  647:         np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
  648:         rtok = relex();
  649:         return (unary(np));
  650:     case NCCL:
  651:         np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
  652:         rtok = relex();
  653:         return (unary(np));
  654:     case '^':
  655:         rtok = relex();
  656:         return (unary(op2(CHAR, NIL, itonp(HAT))));
  657:     case '$':
  658:         rtok = relex();
  659:         return (unary(op2(CHAR, NIL, NIL)));
  660:     case '(':
  661:         rtok = relex();
  662:         if (rtok == ')') {  /* special pleading for () */
  663:             rtok = relex();
  664:             return unary(op2(CCL, NIL, (Node *) tostring("")));
  665:         }
  666:         np = regexp();
  667:         if (rtok == ')') {
  668:             rtok = relex();
  669:             return (unary(np));
  670:         }
  671:         else
  672:             FATAL("syntax error in regular expression %s at %s", lastre, prestr);
  673:     default:
  674:         FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
  675:     }
  676:     return 0;   /*NOTREACHED*/
  677: }
  678: 
  679: Node *concat(Node *np)
  680: {
  681:     switch (rtok) {
  682:     case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
  683:         return (concat(op2(CAT, np, primary())));
  684:     }
  685:     return (np);
  686: }
  687: 
  688: Node *alt(Node *np)
  689: {
  690:     if (rtok == OR) {
  691:         rtok = relex();
  692:         return (alt(op2(OR, np, concat(primary()))));
  693:     }
  694:     return (np);
  695: }
  696: 
  697: Node *unary(Node *np)
  698: {
  699:     switch (rtok) {
  700:     case STAR:
  701:         rtok = relex();
  702:         return (unary(op2(STAR, np, NIL)));
  703:     case PLUS:
  704:         rtok = relex();
  705:         return (unary(op2(PLUS, np, NIL)));
  706:     case QUEST:
  707:         rtok = relex();
  708:         return (unary(op2(QUEST, np, NIL)));
  709:     default:
  710:         return (np);
  711:     }
  712: }
  713: 
  714: /*
  715:  * Character class definitions conformant to the POSIX locale as
  716:  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
  717:  * and operating character sets are both ASCII (ISO646) or supersets
  718:  * thereof.
  719:  *
  720:  * Note that to avoid overflowing the temporary buffer used in
  721:  * relex(), the expanded character class (prior to range expansion)
  722:  * must be less than twice the size of their full name.
  723:  */
  724: 
  725: /* Because isblank doesn't show up in any of the header files on any
  726:  * system i use, it's defined here.  if some other locale has a richer
  727:  * definition of "blank", define HAS_ISBLANK and provide your own
  728:  * version.
  729:  * the parentheses here are an attempt to find a path through the maze
  730:  * of macro definition and/or function and/or version provided.  thanks
  731:  * to nelson beebe for the suggestion; let's see if it works everywhere.
  732:  */
  733: 
  734: /* #define HAS_ISBLANK */
  735: #ifndef HAS_ISBLANK
  736: 
  737: int (xisblank)(int c)
  738: {
  739:     return c==' ' || c=='\t';
  740: }
  741: 
  742: #endif
  743: 
  744: struct charclass {
  745:     const char *cc_name;
  746:     int cc_namelen;
  747:     int (*cc_func)(int);
  748: } charclasses[] = {
  749:     { "alnum",  5,  isalnum },
  750:     { "alpha",  5,  isalpha },
  751: #ifndef HAS_ISBLANK
  752:     { "blank",  5,  isspace }, /* was isblank */
  753: #else
  754:     { "blank",  5,  isblank },
  755: #endif
  756:     { "cntrl",  5,  iscntrl },
  757:     { "digit",  5,  isdigit },
  758:     { "graph",  5,  isgraph },
  759:     { "lower",  5,  islower },
  760:     { "print",  5,  isprint },
  761:     { "punct",  5,  ispunct },
  762:     { "space",  5,  isspace },
  763:     { "upper",  5,  isupper },
  764:     { "xdigit", 6,  isxdigit },
  765:     { NULL,     0,  NULL },
  766: };
  767: 
  768: 
  769: int relex(void)     /* lexical analyzer for reparse */
  770: {
  771:     int c, n;
  772:     int cflag;
  773:     static uschar *buf = 0;
  774:     static int bufsz = 100;
  775:     uschar *bp;
  776:     struct charclass *cc;
  777:     int i;
  778: 
  779:     switch (c = *prestr++) {
  780:     case '|': return OR;
  781:     case '*': return STAR;
  782:     case '+': return PLUS;
  783:     case '?': return QUEST;
  784:     case '.': return DOT;
  785:     case '\0': prestr--; return '\0';
  786:     case '^':
  787:     case '$':
  788:     case '(':
  789:     case ')':
  790:         return c;
  791:     case '\\':
  792:         rlxval = quoted(&prestr);
  793:         return CHAR;
  794:     default:
  795:         rlxval = c;
  796:         return CHAR;
  797:     case '[': 
  798:         if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
  799:             FATAL("out of space in reg expr %.10s..", lastre);
  800:         bp = buf;
  801:         if (*prestr == '^') {
  802:             cflag = 1;
  803:             prestr++;
  804:         }
  805:         else
  806:             cflag = 0;
  807:         n = 2 * strlen((const char *) prestr)+1;
  808:         if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
  809:             FATAL("out of space for reg expr %.10s...", lastre);
  810:         for (; ; ) {
  811:             if ((c = *prestr++) == '\\') {
  812:                 *bp++ = '\\';
  813:                 if ((c = *prestr++) == '\0')
  814:                     FATAL("nonterminated character class %.20s...", lastre);
  815:                 *bp++ = c;
  816:             /* } else if (c == '\n') { */
  817:             /*  FATAL("newline in character class %.20s...", lastre); */
  818:             } else if (c == '[' && *prestr == ':') {
  819:                 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
  820:                 for (cc = charclasses; cc->cc_name; cc++)
  821:                     if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
  822:                         break;
  823:                 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
  824:                     prestr[2 + cc->cc_namelen] == ']') {
  825:                     prestr += cc->cc_namelen + 3;
  826:                     for (i = 0; i < NCHARS; i++) {
  827:                         if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
  828:                             FATAL("out of space for reg expr %.10s...", lastre);
  829:                         if (cc->cc_func(i)) {
  830:                             *bp++ = i;
  831:                             n++;
  832:                         }
  833:                     }
  834:                 } else
  835:                     *bp++ = c;
  836:             } else if (c == '\0') {
  837:                 FATAL("nonterminated character class %.20s", lastre);
  838:             } else if (bp == buf) { /* 1st char is special */
  839:                 *bp++ = c;
  840:             } else if (c == ']') {
  841:                 *bp++ = 0;
  842:                 rlxstr = (uschar *) tostring((char *) buf);
  843:                 if (cflag == 0)
  844:                     return CCL;
  845:                 else
  846:                     return NCCL;
  847:             } else
  848:                 *bp++ = c;
  849:         }
  850:     }
  851: }
  852: 
  853: int cgoto(fa *f, int s, int c)
  854: {
  855:     int i, j, k;
  856:     int *p, *q;
  857: 
  858:     assert(c == HAT || c < NCHARS);
  859:     while (f->accept >= maxsetvec) {    /* guessing here! */
  860:         maxsetvec *= 4;
  861:         setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  862:         tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  863:         if (setvec == 0 || tmpset == 0)
  864:             overflo("out of space in cgoto()");
  865:     }
  866:     for (i = 0; i <= f->accept; i++)
  867:         setvec[i] = 0;
  868:     setcnt = 0;
  869:     /* compute positions of gototab[s,c] into setvec */
  870:     p = f->posns[s];
  871:     for (i = 1; i <= *p; i++) {
  872:         if ((k = f->re[p[i]].ltype) != FINAL) {
  873:             if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
  874:              || (k == DOT && c != 0 && c != HAT)
  875:              || (k == ALL && c != 0)
  876:              || (k == EMPTYRE && c != 0)
  877:              || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
  878:              || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
  879:                 q = f->re[p[i]].lfollow;
  880:                 for (j = 1; j <= *q; j++) {
  881:                     if (q[j] >= maxsetvec) {
  882:                         maxsetvec *= 4;
  883:                         setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
  884:                         tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
  885:                         if (setvec == 0 || tmpset == 0)
  886:                             overflo("cgoto overflow");
  887:                     }
  888:                     if (setvec[q[j]] == 0) {
  889:                         setcnt++;
  890:                         setvec[q[j]] = 1;
  891:                     }
  892:                 }
  893:             }
  894:         }
  895:     }
  896:     /* determine if setvec is a previous state */
  897:     tmpset[0] = setcnt;
  898:     j = 1;
  899:     for (i = f->accept; i >= 0; i--)
  900:         if (setvec[i]) {
  901:             tmpset[j++] = i;
  902:         }
  903:     /* tmpset == previous state? */
  904:     for (i = 1; i <= f->curstat; i++) {
  905:         p = f->posns[i];
  906:         if ((k = tmpset[0]) != p[0])
  907:             goto different;
  908:         for (j = 1; j <= k; j++)
  909:             if (tmpset[j] != p[j])
  910:                 goto different;
  911:         /* setvec is state i */
  912:         f->gototab[s][c] = i;
  913:         return i;
  914:       different:;
  915:     }
  916: 
  917:     /* add tmpset to current set of states */
  918:     if (f->curstat >= NSTATES-1) {
  919:         f->curstat = 2;
  920:         f->reset = 1;
  921:         for (i = 2; i < NSTATES; i++)
  922:             xfree(f->posns[i]);
  923:     } else
  924:         ++(f->curstat);
  925:     for (i = 0; i < NCHARS; i++)
  926:         f->gototab[f->curstat][i] = 0;
  927:     xfree(f->posns[f->curstat]);
  928:     if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
  929:         overflo("out of space in cgoto");
  930: 
  931:     f->posns[f->curstat] = p;
  932:     f->gototab[s][c] = f->curstat;
  933:     for (i = 0; i <= setcnt; i++)
  934:         p[i] = tmpset[i];
  935:     if (setvec[f->accept])
  936:         f->out[f->curstat] = 1;
  937:     else
  938:         f->out[f->curstat] = 0;
  939:     return f->curstat;
  940: }
  941: 
  942: 
  943: void freefa(fa *f)  /* free a finite automaton */
  944: {
  945:     int i;
  946: 
  947:     if (f == NULL)
  948:         return;
  949:     for (i = 0; i <= f->curstat; i++)
  950:         xfree(f->posns[i]);
  951:     for (i = 0; i <= f->accept; i++) {
  952:         xfree(f->re[i].lfollow);
  953:         if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
  954:             xfree((f->re[i].lval.np));
  955:     }
  956:     xfree(f->restr);
  957:     xfree(f);
  958: }

CVSweb interface <joel.bertrand@systella.fr>