Annotation of rpl/rplawk/lex.c, revision 1.1
1.1 ! bertrand 1: /****************************************************************
! 2: Copyright (C) Lucent Technologies 1997
! 3: All Rights Reserved
! 4:
! 5: Permission to use, copy, modify, and distribute this software and
! 6: its documentation for any purpose and without fee is hereby
! 7: granted, provided that the above copyright notice appear in all
! 8: copies and that both that the copyright notice and this
! 9: permission notice and warranty disclaimer appear in supporting
! 10: documentation, and that the name Lucent Technologies or any of
! 11: its entities not be used in advertising or publicity pertaining
! 12: to distribution of the software without specific, written prior
! 13: permission.
! 14:
! 15: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
! 16: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
! 17: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
! 18: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
! 19: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
! 20: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
! 21: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
! 22: THIS SOFTWARE.
! 23: ****************************************************************/
! 24:
! 25: #include <stdio.h>
! 26: #include <stdlib.h>
! 27: #include <string.h>
! 28: #include <ctype.h>
! 29: #include "awk.h"
! 30: #include "ytab.h"
! 31:
! 32: extern YYSTYPE yylval;
! 33: extern int infunc;
! 34:
! 35: int lineno = 1;
! 36: int bracecnt = 0;
! 37: int brackcnt = 0;
! 38: int parencnt = 0;
! 39:
! 40: typedef struct Keyword {
! 41: const char *word;
! 42: int sub;
! 43: int type;
! 44: } Keyword;
! 45:
! 46: Keyword keywords[] ={ /* keep sorted: binary searched */
! 47: { "BEGIN", XBEGIN, XBEGIN },
! 48: { "END", XEND, XEND },
! 49: { "NF", VARNF, VARNF },
! 50: { "atan2", FATAN, BLTIN },
! 51: { "break", BREAK, BREAK },
! 52: { "close", CLOSE, CLOSE },
! 53: { "continue", CONTINUE, CONTINUE },
! 54: { "cos", FCOS, BLTIN },
! 55: { "delete", DELETE, DELETE },
! 56: { "do", DO, DO },
! 57: { "else", ELSE, ELSE },
! 58: { "exit", EXIT, EXIT },
! 59: { "exp", FEXP, BLTIN },
! 60: { "fflush", FFLUSH, BLTIN },
! 61: { "for", FOR, FOR },
! 62: { "func", FUNC, FUNC },
! 63: { "function", FUNC, FUNC },
! 64: { "getline", GETLINE, GETLINE },
! 65: { "gsub", GSUB, GSUB },
! 66: { "if", IF, IF },
! 67: { "in", IN, IN },
! 68: { "index", INDEX, INDEX },
! 69: { "int", FINT, BLTIN },
! 70: { "length", FLENGTH, BLTIN },
! 71: { "log", FLOG, BLTIN },
! 72: { "match", MATCHFCN, MATCHFCN },
! 73: { "next", NEXT, NEXT },
! 74: { "nextfile", NEXTFILE, NEXTFILE },
! 75: { "print", PRINT, PRINT },
! 76: { "printf", PRINTF, PRINTF },
! 77: { "rand", FRAND, BLTIN },
! 78: { "return", RETURN, RETURN },
! 79: { "sin", FSIN, BLTIN },
! 80: { "split", SPLIT, SPLIT },
! 81: { "sprintf", SPRINTF, SPRINTF },
! 82: { "sqrt", FSQRT, BLTIN },
! 83: { "srand", FSRAND, BLTIN },
! 84: { "sub", SUB, SUB },
! 85: { "substr", SUBSTR, SUBSTR },
! 86: { "system", FSYSTEM, BLTIN },
! 87: { "tolower", FTOLOWER, BLTIN },
! 88: { "toupper", FTOUPPER, BLTIN },
! 89: { "while", WHILE, WHILE },
! 90: };
! 91:
! 92: #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
! 93:
! 94: int peek(void)
! 95: {
! 96: int c = input();
! 97: unput(c);
! 98: return c;
! 99: }
! 100:
! 101: int gettok(char **pbuf, int *psz) /* get next input token */
! 102: {
! 103: int c, retc;
! 104: char *buf = *pbuf;
! 105: int sz = *psz;
! 106: char *bp = buf;
! 107:
! 108: c = input();
! 109: if (c == 0)
! 110: return 0;
! 111: buf[0] = c;
! 112: buf[1] = 0;
! 113: if (!isalnum(c) && c != '.' && c != '_')
! 114: return c;
! 115:
! 116: *bp++ = c;
! 117: if (isalpha(c) || c == '_') { /* it's a varname */
! 118: for ( ; (c = input()) != 0; ) {
! 119: if (bp-buf >= sz)
! 120: if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
! 121: FATAL( "out of space for name %.10s...", buf );
! 122: if (isalnum(c) || c == '_')
! 123: *bp++ = c;
! 124: else {
! 125: *bp = 0;
! 126: unput(c);
! 127: break;
! 128: }
! 129: }
! 130: *bp = 0;
! 131: retc = 'a'; /* alphanumeric */
! 132: } else { /* maybe it's a number, but could be . */
! 133: char *rem;
! 134: /* read input until can't be a number */
! 135: for ( ; (c = input()) != 0; ) {
! 136: if (bp-buf >= sz)
! 137: if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
! 138: FATAL( "out of space for number %.10s...", buf );
! 139: if (isdigit(c) || c == 'e' || c == 'E'
! 140: || c == '.' || c == '+' || c == '-')
! 141: *bp++ = c;
! 142: else {
! 143: unput(c);
! 144: break;
! 145: }
! 146: }
! 147: *bp = 0;
! 148: strtod(buf, &rem); /* parse the number */
! 149: if (rem == buf) { /* it wasn't a valid number at all */
! 150: buf[1] = 0; /* return one character as token */
! 151: retc = buf[0]; /* character is its own type */
! 152: unputstr(rem+1); /* put rest back for later */
! 153: } else { /* some prefix was a number */
! 154: unputstr(rem); /* put rest back for later */
! 155: rem[0] = 0; /* truncate buf after number part */
! 156: retc = '0'; /* type is number */
! 157: }
! 158: }
! 159: *pbuf = buf;
! 160: *psz = sz;
! 161: return retc;
! 162: }
! 163:
! 164: int word(char *);
! 165: int string(void);
! 166: int regexpr(void);
! 167: int sc = 0; /* 1 => return a } right now */
! 168: int reg = 0; /* 1 => return a REGEXPR now */
! 169:
! 170: int yylex(void)
! 171: {
! 172: int c;
! 173: static char *buf = 0;
! 174: static int bufsize = 5; /* BUG: setting this small causes core dump! */
! 175:
! 176: if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
! 177: FATAL( "out of space in yylex" );
! 178: if (sc) {
! 179: sc = 0;
! 180: RET('}');
! 181: }
! 182: if (reg) {
! 183: reg = 0;
! 184: return regexpr();
! 185: }
! 186: for (;;) {
! 187: c = gettok(&buf, &bufsize);
! 188: if (c == 0)
! 189: return 0;
! 190: if (isalpha(c) || c == '_')
! 191: return word(buf);
! 192: if (isdigit(c)) {
! 193: yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
! 194: /* should this also have STR set? */
! 195: RET(NUMBER);
! 196: }
! 197:
! 198: yylval.i = c;
! 199: switch (c) {
! 200: case '\n': /* {EOL} */
! 201: RET(NL);
! 202: case '\r': /* assume \n is coming */
! 203: case ' ': /* {WS}+ */
! 204: case '\t':
! 205: break;
! 206: case '#': /* #.* strip comments */
! 207: while ((c = input()) != '\n' && c != 0)
! 208: ;
! 209: unput(c);
! 210: break;
! 211: case ';':
! 212: RET(';');
! 213: case '\\':
! 214: if (peek() == '\n') {
! 215: input();
! 216: } else if (peek() == '\r') {
! 217: input(); input(); /* \n */
! 218: lineno++;
! 219: } else {
! 220: RET(c);
! 221: }
! 222: break;
! 223: case '&':
! 224: if (peek() == '&') {
! 225: input(); RET(AND);
! 226: } else
! 227: RET('&');
! 228: case '|':
! 229: if (peek() == '|') {
! 230: input(); RET(BOR);
! 231: } else
! 232: RET('|');
! 233: case '!':
! 234: if (peek() == '=') {
! 235: input(); yylval.i = NE; RET(NE);
! 236: } else if (peek() == '~') {
! 237: input(); yylval.i = NOTMATCH; RET(MATCHOP);
! 238: } else
! 239: RET(NOT);
! 240: case '~':
! 241: yylval.i = MATCH;
! 242: RET(MATCHOP);
! 243: case '<':
! 244: if (peek() == '=') {
! 245: input(); yylval.i = LE; RET(LE);
! 246: } else {
! 247: yylval.i = LT; RET(LT);
! 248: }
! 249: case '=':
! 250: if (peek() == '=') {
! 251: input(); yylval.i = EQ; RET(EQ);
! 252: } else {
! 253: yylval.i = ASSIGN; RET(ASGNOP);
! 254: }
! 255: case '>':
! 256: if (peek() == '=') {
! 257: input(); yylval.i = GE; RET(GE);
! 258: } else if (peek() == '>') {
! 259: input(); yylval.i = APPEND; RET(APPEND);
! 260: } else {
! 261: yylval.i = GT; RET(GT);
! 262: }
! 263: case '+':
! 264: if (peek() == '+') {
! 265: input(); yylval.i = INCR; RET(INCR);
! 266: } else if (peek() == '=') {
! 267: input(); yylval.i = ADDEQ; RET(ASGNOP);
! 268: } else
! 269: RET('+');
! 270: case '-':
! 271: if (peek() == '-') {
! 272: input(); yylval.i = DECR; RET(DECR);
! 273: } else if (peek() == '=') {
! 274: input(); yylval.i = SUBEQ; RET(ASGNOP);
! 275: } else
! 276: RET('-');
! 277: case '*':
! 278: if (peek() == '=') { /* *= */
! 279: input(); yylval.i = MULTEQ; RET(ASGNOP);
! 280: } else if (peek() == '*') { /* ** or **= */
! 281: input(); /* eat 2nd * */
! 282: if (peek() == '=') {
! 283: input(); yylval.i = POWEQ; RET(ASGNOP);
! 284: } else {
! 285: RET(POWER);
! 286: }
! 287: } else
! 288: RET('*');
! 289: case '/':
! 290: RET('/');
! 291: case '%':
! 292: if (peek() == '=') {
! 293: input(); yylval.i = MODEQ; RET(ASGNOP);
! 294: } else
! 295: RET('%');
! 296: case '^':
! 297: if (peek() == '=') {
! 298: input(); yylval.i = POWEQ; RET(ASGNOP);
! 299: } else
! 300: RET(POWER);
! 301:
! 302: case '$':
! 303: /* BUG: awkward, if not wrong */
! 304: c = gettok(&buf, &bufsize);
! 305: if (isalpha(c)) {
! 306: if (strcmp(buf, "NF") == 0) { /* very special */
! 307: unputstr("(NF)");
! 308: RET(INDIRECT);
! 309: }
! 310: c = peek();
! 311: if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
! 312: unputstr(buf);
! 313: RET(INDIRECT);
! 314: }
! 315: yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
! 316: RET(IVAR);
! 317: } else if (c == 0) { /* */
! 318: SYNTAX( "unexpected end of input after $" );
! 319: RET(';');
! 320: } else {
! 321: unputstr(buf);
! 322: RET(INDIRECT);
! 323: }
! 324:
! 325: case '}':
! 326: if (--bracecnt < 0)
! 327: SYNTAX( "extra }" );
! 328: sc = 1;
! 329: RET(';');
! 330: case ']':
! 331: if (--brackcnt < 0)
! 332: SYNTAX( "extra ]" );
! 333: RET(']');
! 334: case ')':
! 335: if (--parencnt < 0)
! 336: SYNTAX( "extra )" );
! 337: RET(')');
! 338: case '{':
! 339: bracecnt++;
! 340: RET('{');
! 341: case '[':
! 342: brackcnt++;
! 343: RET('[');
! 344: case '(':
! 345: parencnt++;
! 346: RET('(');
! 347:
! 348: case '"':
! 349: return string(); /* BUG: should be like tran.c ? */
! 350:
! 351: default:
! 352: RET(c);
! 353: }
! 354: }
! 355: }
! 356:
! 357: int string(void)
! 358: {
! 359: int c, n;
! 360: char *s, *bp;
! 361: static char *buf = 0;
! 362: static int bufsz = 500;
! 363:
! 364: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
! 365: FATAL("out of space for strings");
! 366: for (bp = buf; (c = input()) != '"'; ) {
! 367: if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
! 368: FATAL("out of space for string %.10s...", buf);
! 369: switch (c) {
! 370: case '\n':
! 371: case '\r':
! 372: case 0:
! 373: SYNTAX( "non-terminated string %.10s...", buf );
! 374: lineno++;
! 375: if (c == 0) /* hopeless */
! 376: FATAL( "giving up" );
! 377: break;
! 378: case '\\':
! 379: c = input();
! 380: switch (c) {
! 381: case '"': *bp++ = '"'; break;
! 382: case 'n': *bp++ = '\n'; break;
! 383: case 't': *bp++ = '\t'; break;
! 384: case 'f': *bp++ = '\f'; break;
! 385: case 'r': *bp++ = '\r'; break;
! 386: case 'b': *bp++ = '\b'; break;
! 387: case 'v': *bp++ = '\v'; break;
! 388: case 'a': *bp++ = '\007'; break;
! 389: case '\\': *bp++ = '\\'; break;
! 390:
! 391: case '0': case '1': case '2': /* octal: \d \dd \ddd */
! 392: case '3': case '4': case '5': case '6': case '7':
! 393: n = c - '0';
! 394: if ((c = peek()) >= '0' && c < '8') {
! 395: n = 8 * n + input() - '0';
! 396: if ((c = peek()) >= '0' && c < '8')
! 397: n = 8 * n + input() - '0';
! 398: }
! 399: *bp++ = n;
! 400: break;
! 401:
! 402: case 'x': /* hex \x0-9a-fA-F + */
! 403: { char xbuf[100], *px;
! 404: for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
! 405: if (isdigit(c)
! 406: || (c >= 'a' && c <= 'f')
! 407: || (c >= 'A' && c <= 'F'))
! 408: *px++ = c;
! 409: else
! 410: break;
! 411: }
! 412: *px = 0;
! 413: unput(c);
! 414: sscanf(xbuf, "%x", &n);
! 415: *bp++ = n;
! 416: break;
! 417: }
! 418:
! 419: default:
! 420: *bp++ = c;
! 421: break;
! 422: }
! 423: break;
! 424: default:
! 425: *bp++ = c;
! 426: break;
! 427: }
! 428: }
! 429: *bp = 0;
! 430: s = tostring(buf);
! 431: *bp++ = ' '; *bp++ = 0;
! 432: yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
! 433: RET(STRING);
! 434: }
! 435:
! 436:
! 437: int binsearch(char *w, Keyword *kp, int n)
! 438: {
! 439: int cond, low, mid, high;
! 440:
! 441: low = 0;
! 442: high = n - 1;
! 443: while (low <= high) {
! 444: mid = (low + high) / 2;
! 445: if ((cond = strcmp(w, kp[mid].word)) < 0)
! 446: high = mid - 1;
! 447: else if (cond > 0)
! 448: low = mid + 1;
! 449: else
! 450: return mid;
! 451: }
! 452: return -1;
! 453: }
! 454:
! 455: int word(char *w)
! 456: {
! 457: Keyword *kp;
! 458: int c, n;
! 459:
! 460: n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
! 461: /* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
! 462: kp = keywords + n;
! 463: if (n != -1) { /* found in table */
! 464: yylval.i = kp->sub;
! 465: switch (kp->type) { /* special handling */
! 466: case BLTIN:
! 467: if (kp->sub == FSYSTEM && safe)
! 468: SYNTAX( "system is unsafe" );
! 469: RET(kp->type);
! 470: case FUNC:
! 471: if (infunc)
! 472: SYNTAX( "illegal nested function" );
! 473: RET(kp->type);
! 474: case RETURN:
! 475: if (!infunc)
! 476: SYNTAX( "return not in function" );
! 477: RET(kp->type);
! 478: case VARNF:
! 479: yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
! 480: RET(VARNF);
! 481: default:
! 482: RET(kp->type);
! 483: }
! 484: }
! 485: c = peek(); /* look for '(' */
! 486: if (c != '(' && infunc && (n=isarg(w)) >= 0) {
! 487: yylval.i = n;
! 488: RET(ARG);
! 489: } else {
! 490: yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
! 491: if (c == '(') {
! 492: RET(CALL);
! 493: } else {
! 494: RET(VAR);
! 495: }
! 496: }
! 497: }
! 498:
! 499: void startreg(void) /* next call to yylex will return a regular expression */
! 500: {
! 501: reg = 1;
! 502: }
! 503:
! 504: int regexpr(void)
! 505: {
! 506: int c;
! 507: static char *buf = 0;
! 508: static int bufsz = 500;
! 509: char *bp;
! 510:
! 511: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
! 512: FATAL("out of space for rex expr");
! 513: bp = buf;
! 514: for ( ; (c = input()) != '/' && c != 0; ) {
! 515: if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
! 516: FATAL("out of space for reg expr %.10s...", buf);
! 517: if (c == '\n') {
! 518: SYNTAX( "newline in regular expression %.10s...", buf );
! 519: unput('\n');
! 520: break;
! 521: } else if (c == '\\') {
! 522: *bp++ = '\\';
! 523: *bp++ = input();
! 524: } else {
! 525: *bp++ = c;
! 526: }
! 527: }
! 528: *bp = 0;
! 529: if (c == 0)
! 530: SYNTAX("non-terminated regular expression %.10s...", buf);
! 531: yylval.s = tostring(buf);
! 532: unput('/');
! 533: RET(REGEXPR);
! 534: }
! 535:
! 536: /* low-level lexical stuff, sort of inherited from lex */
! 537:
! 538: char ebuf[300];
! 539: char *ep = ebuf;
! 540: char yysbuf[100]; /* pushback buffer */
! 541: char *yysptr = yysbuf;
! 542: FILE *yyin = 0;
! 543:
! 544: int input(void) /* get next lexical input character */
! 545: {
! 546: int c;
! 547: extern char *lexprog;
! 548:
! 549: if (yysptr > yysbuf)
! 550: c = (uschar)*--yysptr;
! 551: else if (lexprog != NULL) { /* awk '...' */
! 552: if ((c = (uschar)*lexprog) != 0)
! 553: lexprog++;
! 554: } else /* awk -f ... */
! 555: c = pgetc();
! 556: if (c == '\n')
! 557: lineno++;
! 558: else if (c == EOF)
! 559: c = 0;
! 560: if (ep >= ebuf + sizeof ebuf)
! 561: ep = ebuf;
! 562: return *ep++ = c;
! 563: }
! 564:
! 565: void unput(int c) /* put lexical character back on input */
! 566: {
! 567: if (c == '\n')
! 568: lineno--;
! 569: if (yysptr >= yysbuf + sizeof(yysbuf))
! 570: FATAL("pushed back too much: %.20s...", yysbuf);
! 571: *yysptr++ = c;
! 572: if (--ep < ebuf)
! 573: ep = ebuf + sizeof(ebuf) - 1;
! 574: }
! 575:
! 576: void unputstr(const char *s) /* put a string back on input */
! 577: {
! 578: int i;
! 579:
! 580: for (i = strlen(s)-1; i >= 0; i--)
! 581: unput(s[i]);
! 582: }
CVSweb interface <joel.bertrand@systella.fr>