File:
[local] /
rpl /
rplawk /
b.c
Revision
1.2:
download - view:
text,
annotated -
select for diffs -
revision graph
Wed Jun 12 09:47:52 2013 UTC (11 years, 10 months ago) by
bertrand
Branches:
MAIN
CVS tags:
rpl-4_1_35,
rpl-4_1_34,
rpl-4_1_33,
rpl-4_1_32,
rpl-4_1_31,
rpl-4_1_30,
rpl-4_1_29,
rpl-4_1_28,
rpl-4_1_27,
rpl-4_1_26,
rpl-4_1_25,
rpl-4_1_24,
rpl-4_1_23,
rpl-4_1_22,
rpl-4_1_21,
rpl-4_1_20,
rpl-4_1_19,
rpl-4_1_18,
rpl-4_1_17,
rpl-4_1_16,
rpl-4_1_15,
rpl-4_1_14,
HEAD
Quelques patches pour rplawk et ncurses.
1: /****************************************************************
2: Copyright (C) Lucent Technologies 1997
3: All Rights Reserved
4:
5: Permission to use, copy, modify, and distribute this software and
6: its documentation for any purpose and without fee is hereby
7: granted, provided that the above copyright notice appear in all
8: copies and that both that the copyright notice and this
9: permission notice and warranty disclaimer appear in supporting
10: documentation, and that the name Lucent Technologies or any of
11: its entities not be used in advertising or publicity pertaining
12: to distribution of the software without specific, written prior
13: permission.
14:
15: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22: THIS SOFTWARE.
23: ****************************************************************/
24:
25: /* lasciate ogne speranza, voi ch'intrate. */
26:
27: #define DEBUG
28:
29: #include <ctype.h>
30: #include <stdio.h>
31: #include <string.h>
32: #include <stdlib.h>
33: #include "awk.h"
34: #include "ytab.h"
35:
36: #define HAT (NCHARS+2) /* matches ^ in regular expr */
37: /* NCHARS is 2**n */
38: #define MAXLIN 22
39:
40: #define type(v) (v)->nobj /* badly overloaded here */
41: #define info(v) (v)->ntype /* badly overloaded here */
42: #define left(v) (v)->narg[0]
43: #define right(v) (v)->narg[1]
44: #define parent(v) (v)->nnext
45:
46: #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47: #define ELEAF case EMPTYRE: /* empty string in regexp */
48: #define UNARY case STAR: case PLUS: case QUEST:
49:
50: /* encoding in tree Nodes:
51: leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
52: left is index, right contains value or pointer to value
53: unary (STAR, PLUS, QUEST): left is child, right is null
54: binary (CAT, OR): left and right are children
55: parent contains pointer to parent
56: */
57:
58:
59: int *setvec;
60: int *tmpset;
61: int maxsetvec = 0;
62:
63: int rtok; /* next token in current re */
64: int rlxval;
65: static uschar *rlxstr;
66: static uschar *prestr; /* current position in current re */
67: static uschar *lastre; /* origin of last re */
68:
69: static int setcnt;
70: static int poscnt;
71:
72: char *patbeg;
73: int patlen;
74:
75: #define NFA 20 /* cache this many dynamic fa's */
76: fa *fatab[NFA];
77: int nfatab = 0; /* entries in fatab */
78:
79: fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
80: {
81: int i, use, nuse;
82: fa *pfa;
83: static int now = 1;
84:
85: if (setvec == 0) { /* first time through any RE */
86: maxsetvec = MAXLIN;
87: setvec = (int *) malloc(maxsetvec * sizeof(int));
88: tmpset = (int *) malloc(maxsetvec * sizeof(int));
89: if (setvec == 0 || tmpset == 0)
90: overflo("out of space initializing makedfa");
91: }
92:
93: if (compile_time) /* a constant for sure */
94: return mkdfa(s, anchor);
95: for (i = 0; i < nfatab; i++) /* is it there already? */
96: if (fatab[i]->anchor == anchor
97: && strcmp((const char *) fatab[i]->restr, s) == 0) {
98: fatab[i]->use = now++;
99: return fatab[i];
100: }
101: pfa = mkdfa(s, anchor);
102: if (nfatab < NFA) { /* room for another */
103: fatab[nfatab] = pfa;
104: fatab[nfatab]->use = now++;
105: nfatab++;
106: return pfa;
107: }
108: use = fatab[0]->use; /* replace least-recently used */
109: nuse = 0;
110: for (i = 1; i < nfatab; i++)
111: if (fatab[i]->use < use) {
112: use = fatab[i]->use;
113: nuse = i;
114: }
115: freefa(fatab[nuse]);
116: fatab[nuse] = pfa;
117: pfa->use = now++;
118: return pfa;
119: }
120:
121: fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
122: /* anchor = 1 for anchored matches, else 0 */
123: {
124: Node *p, *p1;
125: fa *f;
126:
127: p = reparse(s);
128: p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129: /* put ALL STAR in front of reg. exp. */
130: p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131: /* put FINAL after reg. exp. */
132:
133: poscnt = 0;
134: penter(p1); /* enter parent pointers and leaf indices */
135: if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136: overflo("out of space for fa");
137: f->accept = poscnt-1; /* penter has computed number of positions in re */
138: cfoll(f, p1); /* set up follow sets */
139: freetr(p1);
140: if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
141: overflo("out of space in makedfa");
142: if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143: overflo("out of space in makedfa");
144: *f->posns[1] = 0;
145: f->initstat = makeinit(f, anchor);
146: f->anchor = anchor;
147: f->restr = (uschar *) tostring(s);
148: return f;
149: }
150:
151: int makeinit(fa *f, int anchor)
152: {
153: int i, k;
154:
155: f->curstat = 2;
156: f->out[2] = 0;
157: f->reset = 0;
158: k = *(f->re[0].lfollow);
159: xfree(f->posns[2]);
160: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
161: overflo("out of space in makeinit");
162: for (i=0; i <= k; i++) {
163: (f->posns[2])[i] = (f->re[0].lfollow)[i];
164: }
165: if ((f->posns[2])[1] == f->accept)
166: f->out[2] = 1;
167: for (i=0; i < NCHARS; i++)
168: f->gototab[2][i] = 0;
169: f->curstat = cgoto(f, 2, HAT);
170: if (anchor) {
171: *f->posns[2] = k-1; /* leave out position 0 */
172: for (i=0; i < k; i++) {
173: (f->posns[0])[i] = (f->posns[2])[i];
174: }
175:
176: f->out[0] = f->out[2];
177: if (f->curstat != 2)
178: --(*f->posns[f->curstat]);
179: }
180: return f->curstat;
181: }
182:
183: void penter(Node *p) /* set up parent pointers and leaf indices */
184: {
185: switch (type(p)) {
186: ELEAF
187: LEAF
188: info(p) = poscnt;
189: poscnt++;
190: break;
191: UNARY
192: penter(left(p));
193: parent(left(p)) = p;
194: break;
195: case CAT:
196: case OR:
197: penter(left(p));
198: penter(right(p));
199: parent(left(p)) = p;
200: parent(right(p)) = p;
201: break;
202: default: /* can't happen */
203: FATAL("can't happen: unknown type %d in penter", type(p));
204: break;
205: }
206: }
207:
208: void freetr(Node *p) /* free parse tree */
209: {
210: switch (type(p)) {
211: ELEAF
212: LEAF
213: xfree(p);
214: break;
215: UNARY
216: freetr(left(p));
217: xfree(p);
218: break;
219: case CAT:
220: case OR:
221: freetr(left(p));
222: freetr(right(p));
223: xfree(p);
224: break;
225: default: /* can't happen */
226: FATAL("can't happen: unknown type %d in freetr", type(p));
227: break;
228: }
229: }
230:
231: /* in the parsing of regular expressions, metacharacters like . have */
232: /* to be seen literally; \056 is not a metacharacter. */
233:
234: int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
235: { /* only pick up one 8-bit byte (2 chars) */
236: uschar *p;
237: int n = 0;
238: int i;
239:
240: for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
241: if (isdigit(*p))
242: n = 16 * n + *p - '0';
243: else if (*p >= 'a' && *p <= 'f')
244: n = 16 * n + *p - 'a' + 10;
245: else if (*p >= 'A' && *p <= 'F')
246: n = 16 * n + *p - 'A' + 10;
247: }
248: *pp = (uschar *) p;
249: return n;
250: }
251:
252: #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
253:
254: int quoted(uschar **pp) /* pick up next thing after a \\ */
255: /* and increment *pp */
256: {
257: uschar *p = *pp;
258: int c;
259:
260: if ((c = *p++) == 't')
261: c = '\t';
262: else if (c == 'n')
263: c = '\n';
264: else if (c == 'f')
265: c = '\f';
266: else if (c == 'r')
267: c = '\r';
268: else if (c == 'b')
269: c = '\b';
270: else if (c == '\\')
271: c = '\\';
272: else if (c == 'x') { /* hexadecimal goo follows */
273: c = hexstr(&p); /* this adds a null if number is invalid */
274: } else if (isoctdigit(c)) { /* \d \dd \ddd */
275: int n = c - '0';
276: if (isoctdigit(*p)) {
277: n = 8 * n + *p++ - '0';
278: if (isoctdigit(*p))
279: n = 8 * n + *p++ - '0';
280: }
281: c = n;
282: } /* else */
283: /* c = c; */
284: *pp = p;
285: return c;
286: }
287:
288: char *cclenter(const char *argp) /* add a character class */
289: {
290: int i, c, c2;
291: uschar *p = (uschar *) argp;
292: uschar *op, *bp;
293: static uschar *buf = 0;
294: static int bufsz = 100;
295:
296: op = p;
297: if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
298: FATAL("out of space for character class [%.10s...] 1", p);
299: bp = buf;
300: for (i = 0; (c = *p++) != 0; ) {
301: if (c == '\\') {
302: c = quoted(&p);
303: } else if (c == '-' && i > 0 && bp[-1] != 0) {
304: if (*p != 0) {
305: c = bp[-1];
306: c2 = *p++;
307: if (c2 == '\\')
308: c2 = quoted(&p);
309: if (c > c2) { /* empty; ignore */
310: bp--;
311: i--;
312: continue;
313: }
314: while (c < c2) {
315: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
316: FATAL("out of space for character class [%.10s...] 2", p);
317: *bp++ = ++c;
318: i++;
319: }
320: continue;
321: }
322: }
323: if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
324: FATAL("out of space for character class [%.10s...] 3", p);
325: *bp++ = c;
326: i++;
327: }
328: *bp = 0;
329: dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
330: xfree(op);
331: return (char *) tostring((char *) buf);
332: }
333:
334: void overflo(const char *s)
335: {
336: FATAL("regular expression too big: %.30s...", s);
337: }
338:
339: void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
340: {
341: int i;
342: int *p;
343:
344: switch (type(v)) {
345: ELEAF
346: LEAF
347: f->re[info(v)].ltype = type(v);
348: f->re[info(v)].lval.np = right(v);
349: while (f->accept >= maxsetvec) { /* guessing here! */
350: maxsetvec *= 4;
351: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
352: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
353: if (setvec == 0 || tmpset == 0)
354: overflo("out of space in cfoll()");
355: }
356: for (i = 0; i <= f->accept; i++)
357: setvec[i] = 0;
358: setcnt = 0;
359: follow(v); /* computes setvec and setcnt */
360: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
361: overflo("out of space building follow set");
362: f->re[info(v)].lfollow = p;
363: *p = setcnt;
364: for (i = f->accept; i >= 0; i--)
365: if (setvec[i] == 1)
366: *++p = i;
367: break;
368: UNARY
369: cfoll(f,left(v));
370: break;
371: case CAT:
372: case OR:
373: cfoll(f,left(v));
374: cfoll(f,right(v));
375: break;
376: default: /* can't happen */
377: FATAL("can't happen: unknown type %d in cfoll", type(v));
378: }
379: }
380:
381: int first(Node *p) /* collects initially active leaves of p into setvec */
382: /* returns 0 if p matches empty string */
383: {
384: int b, lp;
385:
386: switch (type(p)) {
387: ELEAF
388: LEAF
389: lp = info(p); /* look for high-water mark of subscripts */
390: while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
391: maxsetvec *= 4;
392: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
393: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
394: if (setvec == 0 || tmpset == 0)
395: overflo("out of space in first()");
396: }
397: if (type(p) == EMPTYRE) {
398: setvec[lp] = 0;
399: return(0);
400: }
401: if (setvec[lp] != 1) {
402: setvec[lp] = 1;
403: setcnt++;
404: }
405: if (type(p) == CCL && (*(char *) right(p)) == '\0')
406: return(0); /* empty CCL */
407: else return(1);
408: case PLUS:
409: if (first(left(p)) == 0) return(0);
410: return(1);
411: case STAR:
412: case QUEST:
413: first(left(p));
414: return(0);
415: case CAT:
416: if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
417: return(1);
418: case OR:
419: b = first(right(p));
420: if (first(left(p)) == 0 || b == 0) return(0);
421: return(1);
422: }
423: FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
424: return(-1);
425: }
426:
427: void follow(Node *v) /* collects leaves that can follow v into setvec */
428: {
429: Node *p;
430:
431: if (type(v) == FINAL)
432: return;
433: p = parent(v);
434: switch (type(p)) {
435: case STAR:
436: case PLUS:
437: first(v);
438: follow(p);
439: return;
440:
441: case OR:
442: case QUEST:
443: follow(p);
444: return;
445:
446: case CAT:
447: if (v == left(p)) { /* v is left child of p */
448: if (first(right(p)) == 0) {
449: follow(p);
450: return;
451: }
452: } else /* v is right child */
453: follow(p);
454: return;
455: }
456: }
457:
458: int member(int c, const char *sarg) /* is c in s? */
459: {
460: uschar *s = (uschar *) sarg;
461:
462: while (*s)
463: if (c == *s++)
464: return(1);
465: return(0);
466: }
467:
468: int match(fa *f, const char *p0) /* shortest match ? */
469: {
470: int s, ns;
471: uschar *p = (uschar *) p0;
472:
473: s = f->reset ? makeinit(f,0) : f->initstat;
474: if (f->out[s])
475: return(1);
476: do {
477: /* assert(*p < NCHARS); */
478: if ((ns = f->gototab[s][*p]) != 0)
479: s = ns;
480: else
481: s = cgoto(f, s, *p);
482: if (f->out[s])
483: return(1);
484: } while (*p++ != 0);
485: return(0);
486: }
487:
488: int pmatch(fa *f, const char *p0) /* longest match, for sub */
489: {
490: int s, ns;
491: uschar *p = (uschar *) p0;
492: uschar *q;
493: int i, k;
494:
495: /* s = f->reset ? makeinit(f,1) : f->initstat; */
496: if (f->reset) {
497: f->initstat = s = makeinit(f,1);
498: } else {
499: s = f->initstat;
500: }
501: patbeg = (char *) p;
502: patlen = -1;
503: do {
504: q = p;
505: do {
506: if (f->out[s]) /* final state */
507: patlen = q-p;
508: /* assert(*q < NCHARS); */
509: if ((ns = f->gototab[s][*q]) != 0)
510: s = ns;
511: else
512: s = cgoto(f, s, *q);
513: if (s == 1) { /* no transition */
514: if (patlen >= 0) {
515: patbeg = (char *) p;
516: return(1);
517: }
518: else
519: goto nextin; /* no match */
520: }
521: } while (*q++ != 0);
522: if (f->out[s])
523: patlen = q-p-1; /* don't count $ */
524: if (patlen >= 0) {
525: patbeg = (char *) p;
526: return(1);
527: }
528: nextin:
529: s = 2;
530: if (f->reset) {
531: for (i = 2; i <= f->curstat; i++)
532: xfree(f->posns[i]);
533: k = *f->posns[0];
534: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
535: overflo("out of space in pmatch");
536: for (i = 0; i <= k; i++)
537: (f->posns[2])[i] = (f->posns[0])[i];
538: f->initstat = f->curstat = 2;
539: f->out[2] = f->out[0];
540: for (i = 0; i < NCHARS; i++)
541: f->gototab[2][i] = 0;
542: }
543: } while (*p++ != 0);
544: return (0);
545: }
546:
547: int nematch(fa *f, const char *p0) /* non-empty match, for sub */
548: {
549: int s, ns;
550: uschar *p = (uschar *) p0;
551: uschar *q;
552: int i, k;
553:
554: /* s = f->reset ? makeinit(f,1) : f->initstat; */
555: if (f->reset) {
556: f->initstat = s = makeinit(f,1);
557: } else {
558: s = f->initstat;
559: }
560: patlen = -1;
561: while (*p) {
562: q = p;
563: do {
564: if (f->out[s]) /* final state */
565: patlen = q-p;
566: /* assert(*q < NCHARS); */
567: if ((ns = f->gototab[s][*q]) != 0)
568: s = ns;
569: else
570: s = cgoto(f, s, *q);
571: if (s == 1) { /* no transition */
572: if (patlen > 0) {
573: patbeg = (char *) p;
574: return(1);
575: } else
576: goto nnextin; /* no nonempty match */
577: }
578: } while (*q++ != 0);
579: if (f->out[s])
580: patlen = q-p-1; /* don't count $ */
581: if (patlen > 0 ) {
582: patbeg = (char *) p;
583: return(1);
584: }
585: nnextin:
586: s = 2;
587: if (f->reset) {
588: for (i = 2; i <= f->curstat; i++)
589: xfree(f->posns[i]);
590: k = *f->posns[0];
591: if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
592: overflo("out of state space");
593: for (i = 0; i <= k; i++)
594: (f->posns[2])[i] = (f->posns[0])[i];
595: f->initstat = f->curstat = 2;
596: f->out[2] = f->out[0];
597: for (i = 0; i < NCHARS; i++)
598: f->gototab[2][i] = 0;
599: }
600: p++;
601: }
602: return (0);
603: }
604:
605: Node *reparse(const char *p) /* parses regular expression pointed to by p */
606: { /* uses relex() to scan regular expression */
607: Node *np;
608:
609: dprintf( ("reparse <%s>\n", p) );
610: lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
611: rtok = relex();
612: /* GNU compatibility: an empty regexp matches anything */
613: if (rtok == '\0') {
614: /* FATAL("empty regular expression"); previous */
615: return(op2(EMPTYRE, NIL, NIL));
616: }
617: np = regexp();
618: if (rtok != '\0')
619: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
620: return(np);
621: }
622:
623: Node *regexp(void) /* top-level parse of reg expr */
624: {
625: return (alt(concat(primary())));
626: }
627:
628: Node *primary(void)
629: {
630: Node *np;
631:
632: switch (rtok) {
633: case CHAR:
634: np = op2(CHAR, NIL, itonp(rlxval));
635: rtok = relex();
636: return (unary(np));
637: case ALL:
638: rtok = relex();
639: return (unary(op2(ALL, NIL, NIL)));
640: case EMPTYRE:
641: rtok = relex();
642: return (unary(op2(ALL, NIL, NIL)));
643: case DOT:
644: rtok = relex();
645: return (unary(op2(DOT, NIL, NIL)));
646: case CCL:
647: np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
648: rtok = relex();
649: return (unary(np));
650: case NCCL:
651: np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
652: rtok = relex();
653: return (unary(np));
654: case '^':
655: rtok = relex();
656: return (unary(op2(CHAR, NIL, itonp(HAT))));
657: case '$':
658: rtok = relex();
659: return (unary(op2(CHAR, NIL, NIL)));
660: case '(':
661: rtok = relex();
662: if (rtok == ')') { /* special pleading for () */
663: rtok = relex();
664: return unary(op2(CCL, NIL, (Node *) tostring("")));
665: }
666: np = regexp();
667: if (rtok == ')') {
668: rtok = relex();
669: return (unary(np));
670: }
671: else
672: FATAL("syntax error in regular expression %s at %s", lastre, prestr);
673: default:
674: FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
675: }
676: return 0; /*NOTREACHED*/
677: }
678:
679: Node *concat(Node *np)
680: {
681: switch (rtok) {
682: case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
683: return (concat(op2(CAT, np, primary())));
684: }
685: return (np);
686: }
687:
688: Node *alt(Node *np)
689: {
690: if (rtok == OR) {
691: rtok = relex();
692: return (alt(op2(OR, np, concat(primary()))));
693: }
694: return (np);
695: }
696:
697: Node *unary(Node *np)
698: {
699: switch (rtok) {
700: case STAR:
701: rtok = relex();
702: return (unary(op2(STAR, np, NIL)));
703: case PLUS:
704: rtok = relex();
705: return (unary(op2(PLUS, np, NIL)));
706: case QUEST:
707: rtok = relex();
708: return (unary(op2(QUEST, np, NIL)));
709: default:
710: return (np);
711: }
712: }
713:
714: /*
715: * Character class definitions conformant to the POSIX locale as
716: * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
717: * and operating character sets are both ASCII (ISO646) or supersets
718: * thereof.
719: *
720: * Note that to avoid overflowing the temporary buffer used in
721: * relex(), the expanded character class (prior to range expansion)
722: * must be less than twice the size of their full name.
723: */
724:
725: /* Because isblank doesn't show up in any of the header files on any
726: * system i use, it's defined here. if some other locale has a richer
727: * definition of "blank", define HAS_ISBLANK and provide your own
728: * version.
729: * the parentheses here are an attempt to find a path through the maze
730: * of macro definition and/or function and/or version provided. thanks
731: * to nelson beebe for the suggestion; let's see if it works everywhere.
732: */
733:
734: /* #define HAS_ISBLANK */
735: #ifndef HAS_ISBLANK
736:
737: int (xisblank)(int c)
738: {
739: return c==' ' || c=='\t';
740: }
741:
742: #endif
743:
744: struct charclass {
745: const char *cc_name;
746: int cc_namelen;
747: int (*cc_func)(int);
748: } charclasses[] = {
749: { "alnum", 5, isalnum },
750: { "alpha", 5, isalpha },
751: #ifndef HAS_ISBLANK
752: { "blank", 5, isspace }, /* was isblank */
753: #else
754: { "blank", 5, isblank },
755: #endif
756: { "cntrl", 5, iscntrl },
757: { "digit", 5, isdigit },
758: { "graph", 5, isgraph },
759: { "lower", 5, islower },
760: { "print", 5, isprint },
761: { "punct", 5, ispunct },
762: { "space", 5, isspace },
763: { "upper", 5, isupper },
764: { "xdigit", 6, isxdigit },
765: { NULL, 0, NULL },
766: };
767:
768:
769: int relex(void) /* lexical analyzer for reparse */
770: {
771: int c, n;
772: int cflag;
773: static uschar *buf = 0;
774: static int bufsz = 100;
775: uschar *bp;
776: struct charclass *cc;
777: int i;
778:
779: switch (c = *prestr++) {
780: case '|': return OR;
781: case '*': return STAR;
782: case '+': return PLUS;
783: case '?': return QUEST;
784: case '.': return DOT;
785: case '\0': prestr--; return '\0';
786: case '^':
787: case '$':
788: case '(':
789: case ')':
790: return c;
791: case '\\':
792: rlxval = quoted(&prestr);
793: return CHAR;
794: default:
795: rlxval = c;
796: return CHAR;
797: case '[':
798: if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
799: FATAL("out of space in reg expr %.10s..", lastre);
800: bp = buf;
801: if (*prestr == '^') {
802: cflag = 1;
803: prestr++;
804: }
805: else
806: cflag = 0;
807: n = 2 * strlen((const char *) prestr)+1;
808: if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
809: FATAL("out of space for reg expr %.10s...", lastre);
810: for (; ; ) {
811: if ((c = *prestr++) == '\\') {
812: *bp++ = '\\';
813: if ((c = *prestr++) == '\0')
814: FATAL("nonterminated character class %.20s...", lastre);
815: *bp++ = c;
816: /* } else if (c == '\n') { */
817: /* FATAL("newline in character class %.20s...", lastre); */
818: } else if (c == '[' && *prestr == ':') {
819: /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
820: for (cc = charclasses; cc->cc_name; cc++)
821: if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
822: break;
823: if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
824: prestr[2 + cc->cc_namelen] == ']') {
825: prestr += cc->cc_namelen + 3;
826: for (i = 0; i < NCHARS; i++) {
827: if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
828: FATAL("out of space for reg expr %.10s...", lastre);
829: if (cc->cc_func(i)) {
830: *bp++ = i;
831: n++;
832: }
833: }
834: } else
835: *bp++ = c;
836: } else if (c == '\0') {
837: FATAL("nonterminated character class %.20s", lastre);
838: } else if (bp == buf) { /* 1st char is special */
839: *bp++ = c;
840: } else if (c == ']') {
841: *bp++ = 0;
842: rlxstr = (uschar *) tostring((char *) buf);
843: if (cflag == 0)
844: return CCL;
845: else
846: return NCCL;
847: } else
848: *bp++ = c;
849: }
850: }
851: }
852:
853: int cgoto(fa *f, int s, int c)
854: {
855: int i, j, k;
856: int *p, *q;
857:
858: assert(c == HAT || c < NCHARS);
859: while (f->accept >= maxsetvec) { /* guessing here! */
860: maxsetvec *= 4;
861: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
862: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
863: if (setvec == 0 || tmpset == 0)
864: overflo("out of space in cgoto()");
865: }
866: for (i = 0; i <= f->accept; i++)
867: setvec[i] = 0;
868: setcnt = 0;
869: /* compute positions of gototab[s,c] into setvec */
870: p = f->posns[s];
871: for (i = 1; i <= *p; i++) {
872: if ((k = f->re[p[i]].ltype) != FINAL) {
873: if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
874: || (k == DOT && c != 0 && c != HAT)
875: || (k == ALL && c != 0)
876: || (k == EMPTYRE && c != 0)
877: || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
878: || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
879: q = f->re[p[i]].lfollow;
880: for (j = 1; j <= *q; j++) {
881: if (q[j] >= maxsetvec) {
882: maxsetvec *= 4;
883: setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
884: tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
885: if (setvec == 0 || tmpset == 0)
886: overflo("cgoto overflow");
887: }
888: if (setvec[q[j]] == 0) {
889: setcnt++;
890: setvec[q[j]] = 1;
891: }
892: }
893: }
894: }
895: }
896: /* determine if setvec is a previous state */
897: tmpset[0] = setcnt;
898: j = 1;
899: for (i = f->accept; i >= 0; i--)
900: if (setvec[i]) {
901: tmpset[j++] = i;
902: }
903: /* tmpset == previous state? */
904: for (i = 1; i <= f->curstat; i++) {
905: p = f->posns[i];
906: if ((k = tmpset[0]) != p[0])
907: goto different;
908: for (j = 1; j <= k; j++)
909: if (tmpset[j] != p[j])
910: goto different;
911: /* setvec is state i */
912: f->gototab[s][c] = i;
913: return i;
914: different:;
915: }
916:
917: /* add tmpset to current set of states */
918: if (f->curstat >= NSTATES-1) {
919: f->curstat = 2;
920: f->reset = 1;
921: for (i = 2; i < NSTATES; i++)
922: xfree(f->posns[i]);
923: } else
924: ++(f->curstat);
925: for (i = 0; i < NCHARS; i++)
926: f->gototab[f->curstat][i] = 0;
927: xfree(f->posns[f->curstat]);
928: if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
929: overflo("out of space in cgoto");
930:
931: f->posns[f->curstat] = p;
932: f->gototab[s][c] = f->curstat;
933: for (i = 0; i <= setcnt; i++)
934: p[i] = tmpset[i];
935: if (setvec[f->accept])
936: f->out[f->curstat] = 1;
937: else
938: f->out[f->curstat] = 0;
939: return f->curstat;
940: }
941:
942:
943: void freefa(fa *f) /* free a finite automaton */
944: {
945: int i;
946:
947: if (f == NULL)
948: return;
949: for (i = 0; i <= f->curstat; i++)
950: xfree(f->posns[i]);
951: for (i = 0; i <= f->accept; i++) {
952: xfree(f->re[i].lfollow);
953: if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
954: xfree((f->re[i].lval.np));
955: }
956: xfree(f->restr);
957: xfree(f);
958: }
CVSweb interface <joel.bertrand@systella.fr>